VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 17641

Last change on this file since 17641 was 17641, checked in by vboxsync, 16 years ago

Exclude some unused code in RC.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 187.8 KB
Line 
1/* $Id: PGMAllBth.h 17641 2009-03-10 16:21:57Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * This file is a big challenge!
6 */
7
8/*
9 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
10 *
11 * This file is part of VirtualBox Open Source Edition (OSE), as
12 * available from http://www.virtualbox.org. This file is free software;
13 * you can redistribute it and/or modify it under the terms of the GNU
14 * General Public License (GPL) as published by the Free Software
15 * Foundation, in version 2 as it comes in the "COPYING" file of the
16 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
17 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
20 * Clara, CA 95054 USA or visit http://www.sun.com if you need
21 * additional information or have any questions.
22 */
23
24/*******************************************************************************
25* Internal Functions *
26*******************************************************************************/
27__BEGIN_DECLS
28PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault);
29PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage);
30PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
31PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
32PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
33PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR Addr, unsigned fPage, unsigned uErr);
34PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage);
35PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
36#ifdef VBOX_STRICT
37PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
38#endif
39#ifdef PGMPOOL_WITH_USER_TRACKING
40DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys);
41#endif
42PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3);
43PGM_BTH_DECL(int, UnmapCR3)(PVM pVM);
44__END_DECLS
45
46
47/* Filter out some illegal combinations of guest and shadow paging, so we can remove redundant checks inside functions. */
48#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
49# error "Invalid combination; PAE guest implies PAE shadow"
50#endif
51
52#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
53 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
54# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
55#endif
56
57#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
58 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
59# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
60#endif
61
62#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
63 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
64# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
65#endif
66
67#ifdef IN_RING0 /* no mappings in VT-x and AMD-V mode */
68# define PGM_WITHOUT_MAPPINGS
69#endif
70
71
72#ifndef IN_RING3
73/**
74 * #PF Handler for raw-mode guest execution.
75 *
76 * @returns VBox status code (appropriate for trap handling and GC return).
77 * @param pVM VM Handle.
78 * @param uErr The trap error code.
79 * @param pRegFrame Trap register frame.
80 * @param pvFault The fault address.
81 */
82PGM_BTH_DECL(int, Trap0eHandler)(PVM pVM, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault)
83{
84# if defined(IN_RC) && defined(VBOX_STRICT)
85 PGMDynCheckLocks(pVM);
86# endif
87
88# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
89 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
90 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
91
92# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
93 /*
94 * Hide the instruction fetch trap indicator for now.
95 */
96 /** @todo NXE will change this and we must fix NXE in the switcher too! */
97 if (uErr & X86_TRAP_PF_ID)
98 {
99 uErr &= ~X86_TRAP_PF_ID;
100 TRPMSetErrorCode(pVM, uErr);
101 }
102# endif
103
104 /*
105 * Get PDs.
106 */
107 int rc;
108# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
109# if PGM_GST_TYPE == PGM_TYPE_32BIT
110 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
111 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
112
113# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
114
115# if PGM_GST_TYPE == PGM_TYPE_PAE
116 unsigned iPDSrc;
117 X86PDPE PdpeSrc;
118 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, pvFault, &iPDSrc, &PdpeSrc);
119
120# elif PGM_GST_TYPE == PGM_TYPE_AMD64
121 unsigned iPDSrc;
122 PX86PML4E pPml4eSrc;
123 X86PDPE PdpeSrc;
124 PGSTPD pPDSrc;
125
126 pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
127 Assert(pPml4eSrc);
128# endif
129
130 /* Quick check for a valid guest trap. (PAE & AMD64) */
131 if (!pPDSrc)
132 {
133# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
134 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
135# else
136 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVM) & X86_CR3_PAGE_MASK));
137# endif
138 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
139 TRPMSetErrorCode(pVM, uErr);
140 return VINF_EM_RAW_GUEST_TRAP;
141 }
142# endif
143
144# else /* !PGM_WITH_PAGING */
145 PGSTPD pPDSrc = NULL;
146 const unsigned iPDSrc = 0;
147# endif /* !PGM_WITH_PAGING */
148
149
150# if PGM_SHW_TYPE == PGM_TYPE_32BIT
151 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
152 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
153
154# elif PGM_SHW_TYPE == PGM_TYPE_PAE
155 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
156
157 PX86PDPAE pPDDst;
158# if PGM_GST_TYPE != PGM_TYPE_PAE
159 X86PDPE PdpeSrc;
160
161 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
162 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
163# endif
164 rc = pgmShwSyncPaePDPtr(pVM, pvFault, &PdpeSrc, &pPDDst);
165 if (rc != VINF_SUCCESS)
166 {
167 AssertRC(rc);
168 return rc;
169 }
170 Assert(pPDDst);
171
172# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
173 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
174 PX86PDPAE pPDDst;
175# if PGM_GST_TYPE == PGM_TYPE_PROT
176 /* AMD-V nested paging */
177 X86PML4E Pml4eSrc;
178 X86PDPE PdpeSrc;
179 PX86PML4E pPml4eSrc = &Pml4eSrc;
180
181 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
182 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
183 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
184# endif
185
186 rc = pgmShwSyncLongModePDPtr(pVM, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
187 if (rc != VINF_SUCCESS)
188 {
189 AssertRC(rc);
190 return rc;
191 }
192 Assert(pPDDst);
193
194# elif PGM_SHW_TYPE == PGM_TYPE_EPT
195 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
196 PEPTPD pPDDst;
197
198 rc = pgmShwGetEPTPDPtr(pVM, pvFault, NULL, &pPDDst);
199 if (rc != VINF_SUCCESS)
200 {
201 AssertRC(rc);
202 return rc;
203 }
204 Assert(pPDDst);
205# endif
206
207# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
208 /*
209 * If we successfully correct the write protection fault due to dirty bit
210 * tracking, or this page fault is a genuine one, then return immediately.
211 */
212 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
213 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
214 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
215 if ( rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
216 || rc == VINF_EM_RAW_GUEST_TRAP)
217 {
218 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
219 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVM->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVM->pgm.s.StatRZTrap0eTime2GuestTrap; });
220 LogBird(("Trap0eHandler: returns %s\n", rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? "VINF_SUCCESS" : "VINF_EM_RAW_GUEST_TRAP"));
221 return rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? VINF_SUCCESS : rc;
222 }
223
224 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0ePD[iPDSrc]);
225# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
226
227 /*
228 * A common case is the not-present error caused by lazy page table syncing.
229 *
230 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
231 * so we can safely assume that the shadow PT is present when calling SyncPage later.
232 *
233 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
234 * of mapping conflict and defer to SyncCR3 in R3.
235 * (Again, we do NOT support access handlers for non-present guest pages.)
236 *
237 */
238# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
239 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
240# else
241 GSTPDE PdeSrc;
242 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
243 PdeSrc.n.u1Present = 1;
244 PdeSrc.n.u1Write = 1;
245 PdeSrc.n.u1Accessed = 1;
246 PdeSrc.n.u1User = 1;
247# endif
248 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
249 && !pPDDst->a[iPDDst].n.u1Present
250 && PdeSrc.n.u1Present
251 )
252
253 {
254 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2SyncPT; });
255 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
256 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
257 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, pvFault);
258 if (RT_SUCCESS(rc))
259 {
260 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
261 return rc;
262 }
263 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
264 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
265 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeSyncPT, f);
266 return VINF_PGM_SYNC_CR3;
267 }
268
269# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
270 /*
271 * Check if this address is within any of our mappings.
272 *
273 * This is *very* fast and it's gonna save us a bit of effort below and prevent
274 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
275 * (BTW, it's impossible to have physical access handlers in a mapping.)
276 */
277 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
278 {
279 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
280 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
281 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
282 {
283 if (pvFault < pMapping->GCPtr)
284 break;
285 if (pvFault - pMapping->GCPtr < pMapping->cb)
286 {
287 /*
288 * The first thing we check is if we've got an undetected conflict.
289 */
290 if (!pVM->pgm.s.fMappingsFixed)
291 {
292 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
293 while (iPT-- > 0)
294 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
295 {
296 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eConflicts);
297 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
298 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
299 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
300 return VINF_PGM_SYNC_CR3;
301 }
302 }
303
304 /*
305 * Check if the fault address is in a virtual page access handler range.
306 */
307 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
308 if ( pCur
309 && pvFault - pCur->Core.Key < pCur->cb
310 && uErr & X86_TRAP_PF_RW)
311 {
312# ifdef IN_RC
313 STAM_PROFILE_START(&pCur->Stat, h);
314 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
315 STAM_PROFILE_STOP(&pCur->Stat, h);
316# else
317 AssertFailed();
318 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
319# endif
320 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersMapping);
321 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
322 return rc;
323 }
324
325 /*
326 * Pretend we're not here and let the guest handle the trap.
327 */
328 TRPMSetErrorCode(pVM, uErr & ~X86_TRAP_PF_P);
329 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFMapping);
330 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
331 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
332 return VINF_EM_RAW_GUEST_TRAP;
333 }
334 }
335 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeMapping, a);
336 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
337# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
338
339 /*
340 * Check if this fault address is flagged for special treatment,
341 * which means we'll have to figure out the physical address and
342 * check flags associated with it.
343 *
344 * ASSUME that we can limit any special access handling to pages
345 * in page tables which the guest believes to be present.
346 */
347 if (PdeSrc.n.u1Present)
348 {
349 RTGCPHYS GCPhys = NIL_RTGCPHYS;
350
351# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
352# if PGM_GST_TYPE == PGM_TYPE_AMD64
353 bool fBigPagesSupported = true;
354# else
355 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
356# endif
357 if ( PdeSrc.b.u1Size
358 && fBigPagesSupported)
359 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc)
360 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
361 else
362 {
363 PGSTPT pPTSrc;
364 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
365 if (RT_SUCCESS(rc))
366 {
367 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
368 if (pPTSrc->a[iPTESrc].n.u1Present)
369 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
370 }
371 }
372# else
373 /* No paging so the fault address is the physical address */
374 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
375# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
376
377 /*
378 * If we have a GC address we'll check if it has any flags set.
379 */
380 if (GCPhys != NIL_RTGCPHYS)
381 {
382 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
383
384 PPGMPAGE pPage;
385 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
386 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
387 {
388 if ( PGM_PAGE_HAS_ACTIVE_PHYSICAL_HANDLERS(pPage)
389 || PGM_PAGE_HAS_ACTIVE_VIRTUAL_HANDLERS(pPage))
390 {
391 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
392 {
393 /*
394 * Physical page access handler.
395 */
396 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
397 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
398 if (pCur)
399 {
400# ifdef PGM_SYNC_N_PAGES
401 /*
402 * If the region is write protected and we got a page not present fault, then sync
403 * the pages. If the fault was caused by a read, then restart the instruction.
404 * In case of write access continue to the GC write handler.
405 *
406 * ASSUMES that there is only one handler per page or that they have similar write properties.
407 */
408 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
409 && !(uErr & X86_TRAP_PF_P))
410 {
411 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
412 if ( RT_FAILURE(rc)
413 || !(uErr & X86_TRAP_PF_RW)
414 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
415 {
416 AssertRC(rc);
417 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
418 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
419 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
420 return rc;
421 }
422 }
423# endif
424
425 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
426 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
427 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
428
429# if defined(IN_RC) || defined(IN_RING0)
430 if (pCur->CTX_SUFF(pfnHandler))
431 {
432 STAM_PROFILE_START(&pCur->Stat, h);
433 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pCur->CTX_SUFF(pvUser));
434 STAM_PROFILE_STOP(&pCur->Stat, h);
435 }
436 else
437# endif
438 rc = VINF_EM_RAW_EMULATE_INSTR;
439 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersPhysical);
440 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
441 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndPhys; });
442 return rc;
443 }
444 }
445# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
446 else
447 {
448# ifdef PGM_SYNC_N_PAGES
449 /*
450 * If the region is write protected and we got a page not present fault, then sync
451 * the pages. If the fault was caused by a read, then restart the instruction.
452 * In case of write access continue to the GC write handler.
453 */
454 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
455 && !(uErr & X86_TRAP_PF_P))
456 {
457 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
458 if ( RT_FAILURE(rc)
459 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
460 || !(uErr & X86_TRAP_PF_RW))
461 {
462 AssertRC(rc);
463 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
464 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
465 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
466 return rc;
467 }
468 }
469# endif
470 /*
471 * Ok, it's an virtual page access handler.
472 *
473 * Since it's faster to search by address, we'll do that first
474 * and then retry by GCPhys if that fails.
475 */
476 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
477 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
478 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
479 */
480 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
481 if (pCur)
482 {
483 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
484 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
485 || !(uErr & X86_TRAP_PF_P)
486 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
487 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
488
489 if ( pvFault - pCur->Core.Key < pCur->cb
490 && ( uErr & X86_TRAP_PF_RW
491 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
492 {
493# ifdef IN_RC
494 STAM_PROFILE_START(&pCur->Stat, h);
495 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
496 STAM_PROFILE_STOP(&pCur->Stat, h);
497# else
498 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
499# endif
500 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtual);
501 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
502 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
503 return rc;
504 }
505 /* Unhandled part of a monitored page */
506 }
507 else
508 {
509 /* Check by physical address. */
510 PPGMVIRTHANDLER pCur;
511 unsigned iPage;
512 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
513 &pCur, &iPage);
514 Assert(RT_SUCCESS(rc) || !pCur);
515 if ( pCur
516 && ( uErr & X86_TRAP_PF_RW
517 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
518 {
519 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
520# ifdef IN_RC
521 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
522 Assert(off < pCur->cb);
523 STAM_PROFILE_START(&pCur->Stat, h);
524 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
525 STAM_PROFILE_STOP(&pCur->Stat, h);
526# else
527 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
528# endif
529 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
530 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
531 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
532 return rc;
533 }
534 }
535 }
536# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
537
538 /*
539 * There is a handled area of the page, but this fault doesn't belong to it.
540 * We must emulate the instruction.
541 *
542 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
543 * we first check if this was a page-not-present fault for a page with only
544 * write access handlers. Restart the instruction if it wasn't a write access.
545 */
546 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersUnhandled);
547
548 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
549 && !(uErr & X86_TRAP_PF_P))
550 {
551 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
552 if ( RT_FAILURE(rc)
553 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
554 || !(uErr & X86_TRAP_PF_RW))
555 {
556 AssertRC(rc);
557 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersOutOfSync);
558 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
559 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
560 return rc;
561 }
562 }
563
564 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
565 * It's writing to an unhandled part of the LDT page several million times.
566 */
567 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
568 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
569 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
570 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndUnhandled; });
571 return rc;
572 } /* if any kind of handler */
573
574# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
575 if (uErr & X86_TRAP_PF_P)
576 {
577 /*
578 * The page isn't marked, but it might still be monitored by a virtual page access handler.
579 * (ASSUMES no temporary disabling of virtual handlers.)
580 */
581 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
582 * we should correct both the shadow page table and physical memory flags, and not only check for
583 * accesses within the handler region but for access to pages with virtual handlers. */
584 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
585 if (pCur)
586 {
587 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
588 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
589 || !(uErr & X86_TRAP_PF_P)
590 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
591 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
592
593 if ( pvFault - pCur->Core.Key < pCur->cb
594 && ( uErr & X86_TRAP_PF_RW
595 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
596 {
597# ifdef IN_RC
598 STAM_PROFILE_START(&pCur->Stat, h);
599 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
600 STAM_PROFILE_STOP(&pCur->Stat, h);
601# else
602 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
603# endif
604 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
605 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
606 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2HndVirt; });
607 return rc;
608 }
609 }
610 }
611# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
612 }
613 else
614 {
615 /*
616 * When the guest accesses invalid physical memory (e.g. probing
617 * of RAM or accessing a remapped MMIO range), then we'll fall
618 * back to the recompiler to emulate the instruction.
619 */
620 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
621 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eHandlersInvalid);
622 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
623 return VINF_EM_RAW_EMULATE_INSTR;
624 }
625
626 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeHandlers, b);
627
628# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
629 /*
630 * We are here only if page is present in Guest page tables and
631 * trap is not handled by our handlers.
632 *
633 * Check it for page out-of-sync situation.
634 */
635 STAM_PROFILE_START(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
636
637 if (!(uErr & X86_TRAP_PF_P))
638 {
639 /*
640 * Page is not present in our page tables.
641 * Try to sync it!
642 * BTW, fPageShw is invalid in this branch!
643 */
644 if (uErr & X86_TRAP_PF_US)
645 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
646 else /* supervisor */
647 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
648
649# if defined(LOG_ENABLED) && !defined(IN_RING0)
650 RTGCPHYS GCPhys;
651 uint64_t fPageGst;
652 PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
653 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst=%08llx GCPhys=%RGp scan=%d\n",
654 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst, GCPhys, CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)));
655# endif /* LOG_ENABLED */
656
657# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
658 if (CPUMGetGuestCPL(pVM, pRegFrame) == 0)
659 {
660 uint64_t fPageGst;
661 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
662 if ( RT_SUCCESS(rc)
663 && !(fPageGst & X86_PTE_US))
664 {
665 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
666 if ( pvFault == (RTGCPTR)pRegFrame->eip
667 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
668# ifdef CSAM_DETECT_NEW_CODE_PAGES
669 || ( !PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip)
670 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)pRegFrame->eip)) /* any new code we encounter here */
671# endif /* CSAM_DETECT_NEW_CODE_PAGES */
672 )
673 {
674 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
675 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
676 if (rc != VINF_SUCCESS)
677 {
678 /*
679 * CSAM needs to perform a job in ring 3.
680 *
681 * Sync the page before going to the host context; otherwise we'll end up in a loop if
682 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
683 */
684 LogFlow(("CSAM ring 3 job\n"));
685 int rc2 = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
686 AssertRC(rc2);
687
688 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
689 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2CSAM; });
690 return rc;
691 }
692 }
693# ifdef CSAM_DETECT_NEW_CODE_PAGES
694 else if ( uErr == X86_TRAP_PF_RW
695 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
696 && pRegFrame->ecx < 0x10000)
697 {
698 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
699 * to detect loading of new code pages.
700 */
701
702 /*
703 * Decode the instruction.
704 */
705 RTGCPTR PC;
706 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs, &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
707 if (rc == VINF_SUCCESS)
708 {
709 DISCPUSTATE Cpu;
710 uint32_t cbOp;
711 rc = EMInterpretDisasOneEx(pVM, PC, pRegFrame, &Cpu, &cbOp);
712
713 /* For now we'll restrict this to rep movsw/d instructions */
714 if ( rc == VINF_SUCCESS
715 && Cpu.pCurInstr->opcode == OP_MOVSWD
716 && (Cpu.prefix & PREFIX_REP))
717 {
718 CSAMMarkPossibleCodePage(pVM, pvFault);
719 }
720 }
721 }
722# endif /* CSAM_DETECT_NEW_CODE_PAGES */
723
724 /*
725 * Mark this page as safe.
726 */
727 /** @todo not correct for pages that contain both code and data!! */
728 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
729 CSAMMarkPage(pVM, (RTRCPTR)pvFault, true);
730 }
731 }
732# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
733 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
734 if (RT_SUCCESS(rc))
735 {
736 /* The page was successfully synced, return to the guest. */
737 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
738 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSync; });
739 return VINF_SUCCESS;
740 }
741 }
742 else /* uErr & X86_TRAP_PF_P: */
743 {
744 /*
745 * Write protected pages is make writable when the guest makes the first
746 * write to it. This happens for pages that are shared, write monitored
747 * and not yet allocated.
748 *
749 * Also, a side effect of not flushing global PDEs are out of sync pages due
750 * to physical monitored regions, that are no longer valid.
751 * Assume for now it only applies to the read/write flag.
752 */
753 if (RT_SUCCESS(rc) && (uErr & X86_TRAP_PF_RW))
754 {
755# ifdef VBOX_WITH_NEW_PHYS_CODE
756 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
757 {
758 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n",
759 GCPhys, pPage, pvFault, uErr));
760 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
761 if (rc != VINF_SUCCESS)
762 {
763 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
764 return rc;
765 }
766 }
767 /// @todo count the above case; else
768# endif /* VBOX_WITH_NEW_PHYS_CODE */
769 if (uErr & X86_TRAP_PF_US)
770 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
771 else /* supervisor */
772 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
773
774 /*
775 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
776 * page is not present, which is not true in this case.
777 */
778 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, pvFault, 1, uErr);
779 if (RT_SUCCESS(rc))
780 {
781 /*
782 * Page was successfully synced, return to guest.
783 */
784# ifdef VBOX_STRICT
785 RTGCPHYS GCPhys;
786 uint64_t fPageGst;
787 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, &GCPhys);
788 Assert(RT_SUCCESS(rc) && fPageGst & X86_PTE_RW);
789 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys, (uint64_t)fPageGst));
790
791 uint64_t fPageShw;
792 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
793 AssertMsg(RT_SUCCESS(rc) && fPageShw & X86_PTE_RW, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
794# endif /* VBOX_STRICT */
795 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
796 STAM_STATS({ pVM->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVM->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
797 return VINF_SUCCESS;
798 }
799
800 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
801 if ( CPUMGetGuestCPL(pVM, pRegFrame) == 0
802 && ((CPUMGetGuestCR0(pVM) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG)
803 && (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P))
804 {
805 uint64_t fPageGst;
806 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
807 if ( RT_SUCCESS(rc)
808 && !(fPageGst & X86_PTE_RW))
809 {
810 rc = PGMInterpretInstruction(pVM, pRegFrame, pvFault);
811 if (RT_SUCCESS(rc))
812 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulInRZ);
813 else
814 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eWPEmulToR3);
815 return rc;
816 }
817 AssertMsgFailed(("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
818 }
819 }
820
821# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
822# ifdef VBOX_STRICT
823 /*
824 * Check for VMM page flags vs. Guest page flags consistency.
825 * Currently only for debug purposes.
826 */
827 if (RT_SUCCESS(rc))
828 {
829 /* Get guest page flags. */
830 uint64_t fPageGst;
831 rc = PGMGstGetPage(pVM, pvFault, &fPageGst, NULL);
832 if (RT_SUCCESS(rc))
833 {
834 uint64_t fPageShw;
835 rc = PGMShwGetPage(pVM, pvFault, &fPageShw, NULL);
836
837 /*
838 * Compare page flags.
839 * Note: we have AVL, A, D bits desynched.
840 */
841 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
842 ("Page flags mismatch! pvFault=%RGv GCPhys=%RGp fPageShw=%08llx fPageGst=%08llx\n", pvFault, GCPhys, fPageShw, fPageGst));
843 }
844 else
845 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
846 }
847 else
848 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
849# endif /* VBOX_STRICT */
850# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
851 }
852 STAM_PROFILE_STOP(&pVM->pgm.s.StatRZTrap0eTimeOutOfSync, c);
853# endif /* PGM_OUT_OF_SYNC_IN_GC */
854 }
855 else /* GCPhys == NIL_RTGCPHYS */
856 {
857 /*
858 * Page not present in Guest OS or invalid page table address.
859 * This is potential virtual page access handler food.
860 *
861 * For the present we'll say that our access handlers don't
862 * work for this case - we've already discarded the page table
863 * not present case which is identical to this.
864 *
865 * When we perchance find we need this, we will probably have AVL
866 * trees (offset based) to operate on and we can measure their speed
867 * agains mapping a page table and probably rearrange this handling
868 * a bit. (Like, searching virtual ranges before checking the
869 * physical address.)
870 */
871 }
872 }
873 /* else: !present (guest) */
874
875
876# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
877 /*
878 * Conclusion, this is a guest trap.
879 */
880 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
881 STAM_COUNTER_INC(&pVM->pgm.s.StatRZTrap0eGuestPFUnh);
882 return VINF_EM_RAW_GUEST_TRAP;
883# else
884 /* present, but not a monitored page; perhaps the guest is probing physical memory */
885 return VINF_EM_RAW_EMULATE_INSTR;
886# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
887
888
889# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
890
891 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
892 return VERR_INTERNAL_ERROR;
893# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
894}
895#endif /* !IN_RING3 */
896
897
898/**
899 * Emulation of the invlpg instruction.
900 *
901 *
902 * @returns VBox status code.
903 *
904 * @param pVM VM handle.
905 * @param GCPtrPage Page to invalidate.
906 *
907 * @remark ASSUMES that the guest is updating before invalidating. This order
908 * isn't required by the CPU, so this is speculative and could cause
909 * trouble.
910 *
911 * @todo Flush page or page directory only if necessary!
912 * @todo Add a #define for simply invalidating the page.
913 */
914PGM_BTH_DECL(int, InvalidatePage)(PVM pVM, RTGCPTR GCPtrPage)
915{
916#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
917 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
918 && PGM_SHW_TYPE != PGM_TYPE_EPT
919 int rc;
920
921 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
922 /*
923 * Get the shadow PD entry and skip out if this PD isn't present.
924 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
925 */
926# if PGM_SHW_TYPE == PGM_TYPE_32BIT
927 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
928 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
929
930 /* Fetch the pgm pool shadow descriptor. */
931 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
932 Assert(pShwPde);
933
934# elif PGM_SHW_TYPE == PGM_TYPE_PAE
935 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
936 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
937
938 /* If the shadow PDPE isn't present, then skip the invalidate. */
939 if (!pPdptDst->a[iPdpt].n.u1Present)
940 {
941 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
942 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
943 return VINF_SUCCESS;
944 }
945
946 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
947 PPGMPOOLPAGE pShwPde;
948 PX86PDPAE pPDDst;
949
950 /* Fetch the pgm pool shadow descriptor. */
951 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
952 AssertRCSuccessReturn(rc, rc);
953 Assert(pShwPde);
954
955 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
956 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
957
958# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
959 /* PML4 */
960 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
961 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
962 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
963 PX86PDPAE pPDDst;
964 PX86PDPT pPdptDst;
965 PX86PML4E pPml4eDst;
966 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
967 if (rc != VINF_SUCCESS)
968 {
969 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
970 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
971 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
972 PGM_INVL_GUEST_TLBS();
973 return VINF_SUCCESS;
974 }
975 Assert(pPDDst);
976
977 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
978 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
979
980 if (!pPdpeDst->n.u1Present)
981 {
982 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
983 if (!VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
984 PGM_INVL_GUEST_TLBS();
985 return VINF_SUCCESS;
986 }
987
988# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
989
990 const SHWPDE PdeDst = *pPdeDst;
991 if (!PdeDst.n.u1Present)
992 {
993 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
994 return VINF_SUCCESS;
995 }
996
997 /*
998 * Get the guest PD entry and calc big page.
999 */
1000# if PGM_GST_TYPE == PGM_TYPE_32BIT
1001 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
1002 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1003 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1004# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1005 unsigned iPDSrc;
1006# if PGM_GST_TYPE == PGM_TYPE_PAE
1007 X86PDPE PdpeSrc;
1008 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
1009# else /* AMD64 */
1010 PX86PML4E pPml4eSrc;
1011 X86PDPE PdpeSrc;
1012 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1013# endif
1014 GSTPDE PdeSrc;
1015
1016 if (pPDSrc)
1017 PdeSrc = pPDSrc->a[iPDSrc];
1018 else
1019 PdeSrc.u = 0;
1020# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1021
1022# if PGM_GST_TYPE == PGM_TYPE_AMD64
1023 const bool fIsBigPage = PdeSrc.b.u1Size;
1024# else
1025 const bool fIsBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1026# endif
1027
1028# ifdef IN_RING3
1029 /*
1030 * If a CR3 Sync is pending we may ignore the invalidate page operation
1031 * depending on the kind of sync and if it's a global page or not.
1032 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1033 */
1034# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1035 if ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3)
1036 || ( VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL)
1037 && fIsBigPage
1038 && PdeSrc.b.u1Global
1039 )
1040 )
1041# else
1042 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1043# endif
1044 {
1045 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1046 return VINF_SUCCESS;
1047 }
1048# endif /* IN_RING3 */
1049
1050# if PGM_GST_TYPE == PGM_TYPE_AMD64
1051 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1052
1053 /* Fetch the pgm pool shadow descriptor. */
1054 PPGMPOOLPAGE pShwPdpt = pgmPoolGetPageByHCPhys(pVM, pPml4eDst->u & X86_PML4E_PG_MASK);
1055 Assert(pShwPdpt);
1056
1057 /* Fetch the pgm pool shadow descriptor. */
1058 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1059 Assert(pShwPde);
1060
1061 Assert(pPml4eDst->n.u1Present && (pPml4eDst->u & SHW_PDPT_MASK));
1062 RTGCPHYS GCPhysPdpt = pPml4eSrc->u & X86_PML4E_PG_MASK;
1063
1064 if ( !pPml4eSrc->n.u1Present
1065 || pShwPdpt->GCPhys != GCPhysPdpt)
1066 {
1067 LogFlow(("InvalidatePage: Out-of-sync PML4E (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1068 GCPtrPage, pShwPdpt->GCPhys, GCPhysPdpt, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1069 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1070 pPml4eDst->u = 0;
1071 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1072 PGM_INVL_GUEST_TLBS();
1073 return VINF_SUCCESS;
1074 }
1075 if ( pPml4eSrc->n.u1User != pPml4eDst->n.u1User
1076 || (!pPml4eSrc->n.u1Write && pPml4eDst->n.u1Write))
1077 {
1078 /*
1079 * Mark not present so we can resync the PML4E when it's used.
1080 */
1081 LogFlow(("InvalidatePage: Out-of-sync PML4E at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1082 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1083 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1084 pPml4eDst->u = 0;
1085 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1086 PGM_INVL_GUEST_TLBS();
1087 }
1088 else if (!pPml4eSrc->n.u1Accessed)
1089 {
1090 /*
1091 * Mark not present so we can set the accessed bit.
1092 */
1093 LogFlow(("InvalidatePage: Out-of-sync PML4E (A) at %RGv Pml4eSrc=%RX64 Pml4eDst=%RX64\n",
1094 GCPtrPage, (uint64_t)pPml4eSrc->u, (uint64_t)pPml4eDst->u));
1095 pgmPoolFreeByPage(pPool, pShwPdpt, pVM->pgm.s.CTX_SUFF(pShwPageCR3)->idx, iPml4);
1096 pPml4eDst->u = 0;
1097 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1098 PGM_INVL_GUEST_TLBS();
1099 }
1100
1101 /* Check if the PDPT entry has changed. */
1102 Assert(pPdpeDst->n.u1Present && pPdpeDst->u & SHW_PDPT_MASK);
1103 RTGCPHYS GCPhysPd = PdpeSrc.u & GST_PDPE_PG_MASK;
1104 if ( !PdpeSrc.n.u1Present
1105 || pShwPde->GCPhys != GCPhysPd)
1106 {
1107 LogFlow(("InvalidatePage: Out-of-sync PDPE (P/GCPhys) at %RGv GCPhys=%RGp vs %RGp PdpeSrc=%RX64 PdpeDst=%RX64\n",
1108 GCPtrPage, pShwPde->GCPhys, GCPhysPd, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1109 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1110 pPdpeDst->u = 0;
1111 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1112 PGM_INVL_GUEST_TLBS();
1113 return VINF_SUCCESS;
1114 }
1115 if ( PdpeSrc.lm.u1User != pPdpeDst->lm.u1User
1116 || (!PdpeSrc.lm.u1Write && pPdpeDst->lm.u1Write))
1117 {
1118 /*
1119 * Mark not present so we can resync the PDPTE when it's used.
1120 */
1121 LogFlow(("InvalidatePage: Out-of-sync PDPE at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1122 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1123 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1124 pPdpeDst->u = 0;
1125 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1126 PGM_INVL_GUEST_TLBS();
1127 }
1128 else if (!PdpeSrc.lm.u1Accessed)
1129 {
1130 /*
1131 * Mark not present so we can set the accessed bit.
1132 */
1133 LogFlow(("InvalidatePage: Out-of-sync PDPE (A) at %RGv PdpeSrc=%RX64 PdpeDst=%RX64\n",
1134 GCPtrPage, (uint64_t)PdpeSrc.u, (uint64_t)pPdpeDst->u));
1135 pgmPoolFreeByPage(pPool, pShwPde, pShwPdpt->idx, iPdpt);
1136 pPdpeDst->u = 0;
1137 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1138 PGM_INVL_GUEST_TLBS();
1139 }
1140# endif /* PGM_GST_TYPE == PGM_TYPE_AMD64 */
1141
1142
1143 /*
1144 * Deal with the Guest PDE.
1145 */
1146 rc = VINF_SUCCESS;
1147 if (PdeSrc.n.u1Present)
1148 {
1149 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1150 {
1151 /*
1152 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1153 */
1154 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1155 Assert(PGMGetGuestMode(pVM) <= PGMMODE_PAE);
1156 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
1157 }
1158 else if ( PdeSrc.n.u1User != PdeDst.n.u1User
1159 || (!PdeSrc.n.u1Write && PdeDst.n.u1Write))
1160 {
1161 /*
1162 * Mark not present so we can resync the PDE when it's used.
1163 */
1164 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1165 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1166 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1167 pPdeDst->u = 0;
1168 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1169 PGM_INVL_GUEST_TLBS();
1170 }
1171 else if (!PdeSrc.n.u1Accessed)
1172 {
1173 /*
1174 * Mark not present so we can set the accessed bit.
1175 */
1176 LogFlow(("InvalidatePage: Out-of-sync (A) at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1177 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1178 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1179 pPdeDst->u = 0;
1180 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNAs));
1181 PGM_INVL_GUEST_TLBS();
1182 }
1183 else if (!fIsBigPage)
1184 {
1185 /*
1186 * 4KB - page.
1187 */
1188 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1189 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1190# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1191 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1192 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1193# endif
1194 if (pShwPage->GCPhys == GCPhys)
1195 {
1196# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1197 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1198 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1199 if (pPT->a[iPTEDst].n.u1Present)
1200 {
1201# ifdef PGMPOOL_WITH_USER_TRACKING
1202 /* This is very unlikely with caching/monitoring enabled. */
1203 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK);
1204# endif
1205 pPT->a[iPTEDst].u = 0;
1206 }
1207# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1208 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
1209 if (RT_SUCCESS(rc))
1210 rc = VINF_SUCCESS;
1211# endif
1212 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1213 PGM_INVL_PG(GCPtrPage);
1214 }
1215 else
1216 {
1217 /*
1218 * The page table address changed.
1219 */
1220 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1221 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1222 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1223 pPdeDst->u = 0;
1224 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1225 PGM_INVL_GUEST_TLBS();
1226 }
1227 }
1228 else
1229 {
1230 /*
1231 * 2/4MB - page.
1232 */
1233 /* Before freeing the page, check if anything really changed. */
1234 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1235 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1236# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1237 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1238 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1239# endif
1240 if ( pShwPage->GCPhys == GCPhys
1241 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1242 {
1243 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1244 /** @todo PAT */
1245 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1246 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1247 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1248 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1249 {
1250 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1251 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1252 return VINF_SUCCESS;
1253 }
1254 }
1255
1256 /*
1257 * Ok, the page table is present and it's been changed in the guest.
1258 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1259 * We could do this for some flushes in GC too, but we need an algorithm for
1260 * deciding which 4MB pages containing code likely to be executed very soon.
1261 */
1262 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1263 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1264 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1265 pPdeDst->u = 0;
1266 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1267 PGM_INVL_BIG_PG(GCPtrPage);
1268 }
1269 }
1270 else
1271 {
1272 /*
1273 * Page directory is not present, mark shadow PDE not present.
1274 */
1275 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1276 {
1277 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1278 pPdeDst->u = 0;
1279 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1280 PGM_INVL_PG(GCPtrPage);
1281 }
1282 else
1283 {
1284 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1285 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1286 }
1287 }
1288
1289 return rc;
1290
1291#else /* guest real and protected mode */
1292 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1293 return VINF_SUCCESS;
1294#endif
1295}
1296
1297
1298#ifdef PGMPOOL_WITH_USER_TRACKING
1299/**
1300 * Update the tracking of shadowed pages.
1301 *
1302 * @param pVM The VM handle.
1303 * @param pShwPage The shadow page.
1304 * @param HCPhys The physical page we is being dereferenced.
1305 */
1306DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVM pVM, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys)
1307{
1308# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1309 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1310 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1311
1312 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1313 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1314 * 2. write protect all shadowed pages. I.e. implement caching.
1315 */
1316 /*
1317 * Find the guest address.
1318 */
1319 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1320 pRam;
1321 pRam = pRam->CTX_SUFF(pNext))
1322 {
1323 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1324 while (iPage-- > 0)
1325 {
1326 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1327 {
1328 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1329 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage]);
1330 pShwPage->cPresent--;
1331 pPool->cPresent--;
1332 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1333 return;
1334 }
1335 }
1336 }
1337
1338 for (;;)
1339 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1340# else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1341 pShwPage->cPresent--;
1342 pVM->pgm.s.CTX_SUFF(pPool)->cPresent--;
1343# endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
1344}
1345
1346
1347/**
1348 * Update the tracking of shadowed pages.
1349 *
1350 * @param pVM The VM handle.
1351 * @param pShwPage The shadow page.
1352 * @param u16 The top 16-bit of the pPage->HCPhys.
1353 * @param pPage Pointer to the guest page. this will be modified.
1354 * @param iPTDst The index into the shadow table.
1355 */
1356DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVM pVM, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1357{
1358# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1359 /*
1360 * Just deal with the simple first time here.
1361 */
1362 if (!u16)
1363 {
1364 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1365 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1366 }
1367 else
1368 u16 = pgmPoolTrackPhysExtAddref(pVM, u16, pShwPage->idx);
1369
1370 /* write back */
1371 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1372 PGM_PAGE_SET_TRACKING(pPage, u16);
1373
1374# endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
1375
1376 /* update statistics. */
1377 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1378 pShwPage->cPresent++;
1379 if (pShwPage->iFirstPresent > iPTDst)
1380 pShwPage->iFirstPresent = iPTDst;
1381}
1382#endif /* PGMPOOL_WITH_USER_TRACKING */
1383
1384
1385/**
1386 * Creates a 4K shadow page for a guest page.
1387 *
1388 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1389 * physical address. The PdeSrc argument only the flags are used. No page structured
1390 * will be mapped in this function.
1391 *
1392 * @param pVM VM handle.
1393 * @param pPteDst Destination page table entry.
1394 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1395 * Can safely assume that only the flags are being used.
1396 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1397 * @param pShwPage Pointer to the shadow page.
1398 * @param iPTDst The index into the shadow table.
1399 *
1400 * @remark Not used for 2/4MB pages!
1401 */
1402DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVM pVM, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1403{
1404 if (PteSrc.n.u1Present)
1405 {
1406 /*
1407 * Find the ram range.
1408 */
1409 PPGMPAGE pPage;
1410 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1411 if (RT_SUCCESS(rc))
1412 {
1413#ifdef VBOX_WITH_NEW_PHYS_CODE
1414# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1415 /* Try make the page writable if necessary. */
1416 if ( PteSrc.n.u1Write
1417 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1418 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1419 {
1420 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1421 AssertRC(rc);
1422 }
1423# endif
1424#endif
1425
1426 /** @todo investiage PWT, PCD and PAT. */
1427 /*
1428 * Make page table entry.
1429 */
1430 SHWPTE PteDst;
1431 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1432 {
1433 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No. */
1434 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1435 {
1436#if PGM_SHW_TYPE == PGM_TYPE_EPT
1437 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1438 PteDst.n.u1Present = 1;
1439 PteDst.n.u1Execute = 1;
1440 PteDst.n.u1IgnorePAT = 1;
1441 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1442 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1443#else
1444 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1445 | PGM_PAGE_GET_HCPHYS(pPage);
1446#endif
1447 }
1448 else
1449 {
1450 LogFlow(("SyncPageWorker: monitored page (%RHp) -> mark not present\n", PGM_PAGE_GET_HCPHYS(pPage)));
1451 PteDst.u = 0;
1452 }
1453 /** @todo count these two kinds. */
1454 }
1455 else
1456 {
1457#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1458 /*
1459 * If the page or page directory entry is not marked accessed,
1460 * we mark the page not present.
1461 */
1462 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1463 {
1464 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1465 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1466 PteDst.u = 0;
1467 }
1468 else
1469 /*
1470 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1471 * when the page is modified.
1472 */
1473 if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1474 {
1475 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1476 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1477 | PGM_PAGE_GET_HCPHYS(pPage)
1478 | PGM_PTFLAGS_TRACK_DIRTY;
1479 }
1480 else
1481#endif
1482 {
1483 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1484#if PGM_SHW_TYPE == PGM_TYPE_EPT
1485 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1486 PteDst.n.u1Present = 1;
1487 PteDst.n.u1Write = 1;
1488 PteDst.n.u1Execute = 1;
1489 PteDst.n.u1IgnorePAT = 1;
1490 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1491 /* PteDst.n.u1Size = 0 */
1492#else
1493 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1494 | PGM_PAGE_GET_HCPHYS(pPage);
1495#endif
1496 }
1497 }
1498
1499#ifdef VBOX_WITH_NEW_PHYS_CODE
1500 /*
1501 * Make sure only allocated pages are mapped writable.
1502 */
1503 if ( PteDst.n.u1Write
1504 && PteDst.n.u1Present
1505 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1506 {
1507 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. */
1508 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1509 }
1510#endif
1511
1512#ifdef PGMPOOL_WITH_USER_TRACKING
1513 /*
1514 * Keep user track up to date.
1515 */
1516 if (PteDst.n.u1Present)
1517 {
1518 if (!pPteDst->n.u1Present)
1519 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1520 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1521 {
1522 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1523 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1524 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1525 }
1526 }
1527 else if (pPteDst->n.u1Present)
1528 {
1529 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1530 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1531 }
1532#endif /* PGMPOOL_WITH_USER_TRACKING */
1533
1534 /*
1535 * Update statistics and commit the entry.
1536 */
1537#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1538 if (!PteSrc.n.u1Global)
1539 pShwPage->fSeenNonGlobal = true;
1540#endif
1541 *pPteDst = PteDst;
1542 }
1543 /* else MMIO or invalid page, we must handle them manually in the #PF handler. */
1544 /** @todo count these. */
1545 }
1546 else
1547 {
1548 /*
1549 * Page not-present.
1550 */
1551 LogFlow(("SyncPageWorker: page not present in Pte\n"));
1552#ifdef PGMPOOL_WITH_USER_TRACKING
1553 /* Keep user track up to date. */
1554 if (pPteDst->n.u1Present)
1555 {
1556 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1557 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVM, pShwPage, pPteDst->u & SHW_PTE_PG_MASK);
1558 }
1559#endif /* PGMPOOL_WITH_USER_TRACKING */
1560 pPteDst->u = 0;
1561 /** @todo count these. */
1562 }
1563}
1564
1565
1566/**
1567 * Syncs a guest OS page.
1568 *
1569 * There are no conflicts at this point, neither is there any need for
1570 * page table allocations.
1571 *
1572 * @returns VBox status code.
1573 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1574 * @param pVM VM handle.
1575 * @param PdeSrc Page directory entry of the guest.
1576 * @param GCPtrPage Guest context page address.
1577 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1578 * @param uErr Fault error (X86_TRAP_PF_*).
1579 */
1580PGM_BTH_DECL(int, SyncPage)(PVM pVM, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1581{
1582 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1583
1584#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1585 || PGM_GST_TYPE == PGM_TYPE_PAE \
1586 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1587 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1588 && PGM_SHW_TYPE != PGM_TYPE_EPT
1589
1590# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1591 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
1592# endif
1593
1594 /*
1595 * Assert preconditions.
1596 */
1597 Assert(PdeSrc.n.u1Present);
1598 Assert(cPages);
1599 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1600
1601 /*
1602 * Get the shadow PDE, find the shadow page table in the pool.
1603 */
1604# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1605 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1606 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
1607
1608 /* Fetch the pgm pool shadow descriptor. */
1609 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
1610 Assert(pShwPde);
1611
1612# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1613 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1614 PPGMPOOLPAGE pShwPde;
1615 PX86PDPAE pPDDst;
1616
1617 /* Fetch the pgm pool shadow descriptor. */
1618 int rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
1619 AssertRCSuccessReturn(rc, rc);
1620 Assert(pShwPde);
1621
1622 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1623 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1624
1625# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1626 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1627 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1628 PX86PDPAE pPDDst;
1629 PX86PDPT pPdptDst;
1630
1631 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1632 AssertRCSuccessReturn(rc, rc);
1633 Assert(pPDDst && pPdptDst);
1634 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1635# endif
1636
1637 SHWPDE PdeDst = *pPdeDst;
1638 AssertMsg(PdeDst.n.u1Present, ("%p=%llx\n", pPdeDst, (uint64_t)PdeDst.u));
1639 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1640
1641# if PGM_GST_TYPE == PGM_TYPE_AMD64
1642 /* Fetch the pgm pool shadow descriptor. */
1643 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1644 Assert(pShwPde);
1645# endif
1646
1647 /*
1648 * Check that the page is present and that the shadow PDE isn't out of sync.
1649 */
1650# if PGM_GST_TYPE == PGM_TYPE_AMD64
1651 const bool fBigPage = PdeSrc.b.u1Size;
1652# else
1653 const bool fBigPage = PdeSrc.b.u1Size && (CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
1654# endif
1655 RTGCPHYS GCPhys;
1656 if (!fBigPage)
1657 {
1658 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1659# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1660 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1661 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1662# endif
1663 }
1664 else
1665 {
1666 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
1667# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1668 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1669 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1670# endif
1671 }
1672 if ( pShwPage->GCPhys == GCPhys
1673 && PdeSrc.n.u1Present
1674 && (PdeSrc.n.u1User == PdeDst.n.u1User)
1675 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1676# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1677 && (!fNoExecuteBitValid || PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute)
1678# endif
1679 )
1680 {
1681 /*
1682 * Check that the PDE is marked accessed already.
1683 * Since we set the accessed bit *before* getting here on a #PF, this
1684 * check is only meant for dealing with non-#PF'ing paths.
1685 */
1686 if (PdeSrc.n.u1Accessed)
1687 {
1688 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1689 if (!fBigPage)
1690 {
1691 /*
1692 * 4KB Page - Map the guest page table.
1693 */
1694 PGSTPT pPTSrc;
1695 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1696 if (RT_SUCCESS(rc))
1697 {
1698# ifdef PGM_SYNC_N_PAGES
1699 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1700 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1701 {
1702 /*
1703 * This code path is currently only taken when the caller is PGMTrap0eHandler
1704 * for non-present pages!
1705 *
1706 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1707 * deal with locality.
1708 */
1709 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1710# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1711 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1712 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1713# else
1714 const unsigned offPTSrc = 0;
1715# endif
1716 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1717 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1718 iPTDst = 0;
1719 else
1720 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1721 for (; iPTDst < iPTDstEnd; iPTDst++)
1722 {
1723 if (!pPTDst->a[iPTDst].n.u1Present)
1724 {
1725 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1726 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1727 NOREF(GCPtrCurPage);
1728#ifndef IN_RING0
1729 /*
1730 * Assuming kernel code will be marked as supervisor - and not as user level
1731 * and executed using a conforming code selector - And marked as readonly.
1732 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1733 */
1734 PPGMPAGE pPage;
1735 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1736 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1737 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)GCPtrCurPage)
1738 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1739 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1740 )
1741#endif /* else: CSAM not active */
1742 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1743 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1744 GCPtrCurPage, PteSrc.n.u1Present,
1745 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1746 PteSrc.n.u1User & PdeSrc.n.u1User,
1747 (uint64_t)PteSrc.u,
1748 (uint64_t)pPTDst->a[iPTDst].u,
1749 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1750 }
1751 }
1752 }
1753 else
1754# endif /* PGM_SYNC_N_PAGES */
1755 {
1756 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1757 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1758 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1759 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1760 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s\n",
1761 GCPtrPage, PteSrc.n.u1Present,
1762 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1763 PteSrc.n.u1User & PdeSrc.n.u1User,
1764 (uint64_t)PteSrc.u,
1765 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1766 }
1767 }
1768 else /* MMIO or invalid page: emulated in #PF handler. */
1769 {
1770 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1771 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1772 }
1773 }
1774 else
1775 {
1776 /*
1777 * 4/2MB page - lazy syncing shadow 4K pages.
1778 * (There are many causes of getting here, it's no longer only CSAM.)
1779 */
1780 /* Calculate the GC physical address of this 4KB shadow page. */
1781 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1782 /* Find ram range. */
1783 PPGMPAGE pPage;
1784 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1785 if (RT_SUCCESS(rc))
1786 {
1787# ifdef VBOX_WITH_NEW_PHYS_CODE
1788# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1789 /* Try make the page writable if necessary. */
1790 if ( PdeSrc.n.u1Write
1791 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1792 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
1793 {
1794 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
1795 AssertRC(rc);
1796 }
1797# endif
1798# endif
1799
1800 /*
1801 * Make shadow PTE entry.
1802 */
1803 SHWPTE PteDst;
1804 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1805 | PGM_PAGE_GET_HCPHYS(pPage);
1806 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1807 {
1808 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1809 PteDst.n.u1Write = 0;
1810 else
1811 PteDst.u = 0;
1812 }
1813 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1814# ifdef PGMPOOL_WITH_USER_TRACKING
1815 if (PteDst.n.u1Present && !pPTDst->a[iPTDst].n.u1Present)
1816 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1817# endif
1818# ifdef VBOX_WITH_NEW_PHYS_CODE
1819 /* Make sure only allocated pages are mapped writable. */
1820 if ( PteDst.n.u1Write
1821 && PteDst.n.u1Present
1822 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1823 {
1824 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1825 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1826 }
1827# endif
1828
1829 pPTDst->a[iPTDst] = PteDst;
1830
1831
1832 /*
1833 * If the page is not flagged as dirty and is writable, then make it read-only
1834 * at PD level, so we can set the dirty bit when the page is modified.
1835 *
1836 * ASSUMES that page access handlers are implemented on page table entry level.
1837 * Thus we will first catch the dirty access and set PDE.D and restart. If
1838 * there is an access handler, we'll trap again and let it work on the problem.
1839 */
1840 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1841 * As for invlpg, it simply frees the whole shadow PT.
1842 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1843 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
1844 {
1845 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1846 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1847 PdeDst.n.u1Write = 0;
1848 }
1849 else
1850 {
1851 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1852 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1853 }
1854 *pPdeDst = PdeDst;
1855 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1856 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1857 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1858 }
1859 else
1860 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1861 }
1862 return VINF_SUCCESS;
1863 }
1864 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
1865 }
1866 else
1867 {
1868 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
1869 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
1870 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
1871 }
1872
1873 /*
1874 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
1875 * Yea, I'm lazy.
1876 */
1877 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1878 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
1879
1880 pPdeDst->u = 0;
1881 PGM_INVL_GUEST_TLBS();
1882 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
1883
1884#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
1885 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1886 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
1887 && !defined(IN_RC)
1888
1889# ifdef PGM_SYNC_N_PAGES
1890 /*
1891 * Get the shadow PDE, find the shadow page table in the pool.
1892 */
1893# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1894 X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
1895
1896# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1897 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVM->pgm.s, GCPtrPage);
1898
1899# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1900 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1901 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
1902 PX86PDPAE pPDDst;
1903 X86PDEPAE PdeDst;
1904 PX86PDPT pPdptDst;
1905
1906 int rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1907 AssertRCSuccessReturn(rc, rc);
1908 Assert(pPDDst && pPdptDst);
1909 PdeDst = pPDDst->a[iPDDst];
1910# elif PGM_SHW_TYPE == PGM_TYPE_EPT
1911 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
1912 PEPTPD pPDDst;
1913 EPTPDE PdeDst;
1914
1915 int rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, NULL, &pPDDst);
1916 if (rc != VINF_SUCCESS)
1917 {
1918 AssertRC(rc);
1919 return rc;
1920 }
1921 Assert(pPDDst);
1922 PdeDst = pPDDst->a[iPDDst];
1923# endif
1924 AssertMsg(PdeDst.n.u1Present, ("%#llx\n", (uint64_t)PdeDst.u));
1925 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, PdeDst.u & SHW_PDE_PG_MASK);
1926 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1927
1928 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1929 if (cPages > 1 && !(uErr & X86_TRAP_PF_P))
1930 {
1931 /*
1932 * This code path is currently only taken when the caller is PGMTrap0eHandler
1933 * for non-present pages!
1934 *
1935 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1936 * deal with locality.
1937 */
1938 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1939 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1940 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1941 iPTDst = 0;
1942 else
1943 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1944 for (; iPTDst < iPTDstEnd; iPTDst++)
1945 {
1946 if (!pPTDst->a[iPTDst].n.u1Present)
1947 {
1948 GSTPTE PteSrc;
1949
1950 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1951
1952 /* Fake the page table entry */
1953 PteSrc.u = GCPtrCurPage;
1954 PteSrc.n.u1Present = 1;
1955 PteSrc.n.u1Dirty = 1;
1956 PteSrc.n.u1Accessed = 1;
1957 PteSrc.n.u1Write = 1;
1958 PteSrc.n.u1User = 1;
1959
1960 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1961
1962 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1963 GCPtrCurPage, PteSrc.n.u1Present,
1964 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1965 PteSrc.n.u1User & PdeSrc.n.u1User,
1966 (uint64_t)PteSrc.u,
1967 (uint64_t)pPTDst->a[iPTDst].u,
1968 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1969 }
1970 else
1971 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
1972 }
1973 }
1974 else
1975# endif /* PGM_SYNC_N_PAGES */
1976 {
1977 GSTPTE PteSrc;
1978 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1979 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
1980
1981 /* Fake the page table entry */
1982 PteSrc.u = GCPtrCurPage;
1983 PteSrc.n.u1Present = 1;
1984 PteSrc.n.u1Dirty = 1;
1985 PteSrc.n.u1Accessed = 1;
1986 PteSrc.n.u1Write = 1;
1987 PteSrc.n.u1User = 1;
1988 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1989
1990 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
1991 GCPtrPage, PteSrc.n.u1Present,
1992 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1993 PteSrc.n.u1User & PdeSrc.n.u1User,
1994 (uint64_t)PteSrc.u,
1995 (uint64_t)pPTDst->a[iPTDst].u,
1996 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1997 }
1998 return VINF_SUCCESS;
1999
2000#else
2001 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2002 return VERR_INTERNAL_ERROR;
2003#endif
2004}
2005
2006
2007#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2008/**
2009 * Investigate page fault and handle write protection page faults caused by
2010 * dirty bit tracking.
2011 *
2012 * @returns VBox status code.
2013 * @param pVM VM handle.
2014 * @param uErr Page fault error code.
2015 * @param pPdeDst Shadow page directory entry.
2016 * @param pPdeSrc Guest page directory entry.
2017 * @param GCPtrPage Guest context page address.
2018 */
2019PGM_BTH_DECL(int, CheckPageFault)(PVM pVM, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2020{
2021 bool fWriteProtect = !!(CPUMGetGuestCR0(pVM) & X86_CR0_WP);
2022 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2023 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2024# if PGM_GST_TYPE == PGM_TYPE_AMD64
2025 bool fBigPagesSupported = true;
2026# else
2027 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2028# endif
2029# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2030 bool fNoExecuteBitValid = !!(CPUMGetGuestEFER(pVM) & MSR_K6_EFER_NXE);
2031# endif
2032 unsigned uPageFaultLevel;
2033 int rc;
2034
2035 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2036 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2037
2038# if PGM_GST_TYPE == PGM_TYPE_PAE \
2039 || PGM_GST_TYPE == PGM_TYPE_AMD64
2040
2041# if PGM_GST_TYPE == PGM_TYPE_AMD64
2042 PX86PML4E pPml4eSrc;
2043 PX86PDPE pPdpeSrc;
2044
2045 pPdpeSrc = pgmGstGetLongModePDPTPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc);
2046 Assert(pPml4eSrc);
2047
2048 /*
2049 * Real page fault? (PML4E level)
2050 */
2051 if ( (uErr & X86_TRAP_PF_RSVD)
2052 || !pPml4eSrc->n.u1Present
2053 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPml4eSrc->n.u1NoExecute)
2054 || (fWriteFault && !pPml4eSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2055 || (fUserLevelFault && !pPml4eSrc->n.u1User)
2056 )
2057 {
2058 uPageFaultLevel = 0;
2059 goto l_UpperLevelPageFault;
2060 }
2061 Assert(pPdpeSrc);
2062
2063# else /* PAE */
2064 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(&pVM->pgm.s, GCPtrPage);
2065# endif /* PAE */
2066
2067 /*
2068 * Real page fault? (PDPE level)
2069 */
2070 if ( (uErr & X86_TRAP_PF_RSVD)
2071 || !pPdpeSrc->n.u1Present
2072# if PGM_GST_TYPE == PGM_TYPE_AMD64 /* NX, r/w, u/s bits in the PDPE are long mode only */
2073 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdpeSrc->lm.u1NoExecute)
2074 || (fWriteFault && !pPdpeSrc->lm.u1Write && (fUserLevelFault || fWriteProtect))
2075 || (fUserLevelFault && !pPdpeSrc->lm.u1User)
2076# endif
2077 )
2078 {
2079 uPageFaultLevel = 1;
2080 goto l_UpperLevelPageFault;
2081 }
2082# endif
2083
2084 /*
2085 * Real page fault? (PDE level)
2086 */
2087 if ( (uErr & X86_TRAP_PF_RSVD)
2088 || !pPdeSrc->n.u1Present
2089# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2090 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && pPdeSrc->n.u1NoExecute)
2091# endif
2092 || (fWriteFault && !pPdeSrc->n.u1Write && (fUserLevelFault || fWriteProtect))
2093 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2094 {
2095 uPageFaultLevel = 2;
2096 goto l_UpperLevelPageFault;
2097 }
2098
2099 /*
2100 * First check the easy case where the page directory has been marked read-only to track
2101 * the dirty bit of an emulated BIG page
2102 */
2103 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2104 {
2105 /* Mark guest page directory as accessed */
2106# if PGM_GST_TYPE == PGM_TYPE_AMD64
2107 pPml4eSrc->n.u1Accessed = 1;
2108 pPdpeSrc->lm.u1Accessed = 1;
2109# endif
2110 pPdeSrc->b.u1Accessed = 1;
2111
2112 /*
2113 * Only write protection page faults are relevant here.
2114 */
2115 if (fWriteFault)
2116 {
2117 /* Mark guest page directory as dirty (BIG page only). */
2118 pPdeSrc->b.u1Dirty = 1;
2119
2120 if (pPdeDst->n.u1Present && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2121 {
2122 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2123
2124 Assert(pPdeSrc->b.u1Write);
2125
2126 pPdeDst->n.u1Write = 1;
2127 pPdeDst->n.u1Accessed = 1;
2128 pPdeDst->au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2129 PGM_INVL_BIG_PG(GCPtrPage);
2130 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2131 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2132 }
2133 }
2134 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2135 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2136 }
2137 /* else: 4KB page table */
2138
2139 /*
2140 * Map the guest page table.
2141 */
2142 PGSTPT pPTSrc;
2143 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2144 if (RT_SUCCESS(rc))
2145 {
2146 /*
2147 * Real page fault?
2148 */
2149 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2150 const GSTPTE PteSrc = *pPteSrc;
2151 if ( !PteSrc.n.u1Present
2152# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2153 || (fNoExecuteBitValid && (uErr & X86_TRAP_PF_ID) && PteSrc.n.u1NoExecute)
2154# endif
2155 || (fWriteFault && !PteSrc.n.u1Write && (fUserLevelFault || fWriteProtect))
2156 || (fUserLevelFault && !PteSrc.n.u1User)
2157 )
2158 {
2159 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2160 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2161 LogFlow(("CheckPageFault: real page fault at %RGv PteSrc.u=%08x (2)\n", GCPtrPage, PteSrc.u));
2162
2163 /* Check the present bit as the shadow tables can cause different error codes by being out of sync.
2164 * See the 2nd case above as well.
2165 */
2166 if (pPdeSrc->n.u1Present && pPteSrc->n.u1Present)
2167 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2168
2169 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2170 return VINF_EM_RAW_GUEST_TRAP;
2171 }
2172 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2173
2174 /*
2175 * Set the accessed bits in the page directory and the page table.
2176 */
2177# if PGM_GST_TYPE == PGM_TYPE_AMD64
2178 pPml4eSrc->n.u1Accessed = 1;
2179 pPdpeSrc->lm.u1Accessed = 1;
2180# endif
2181 pPdeSrc->n.u1Accessed = 1;
2182 pPteSrc->n.u1Accessed = 1;
2183
2184 /*
2185 * Only write protection page faults are relevant here.
2186 */
2187 if (fWriteFault)
2188 {
2189 /* Write access, so mark guest entry as dirty. */
2190# ifdef VBOX_WITH_STATISTICS
2191 if (!pPteSrc->n.u1Dirty)
2192 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2193 else
2194 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2195# endif
2196
2197 pPteSrc->n.u1Dirty = 1;
2198
2199 if (pPdeDst->n.u1Present)
2200 {
2201#ifndef IN_RING0
2202 /* Bail out here as pgmPoolGetPageByHCPhys will return NULL and we'll crash below.
2203 * Our individual shadow handlers will provide more information and force a fatal exit.
2204 */
2205 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2206 {
2207 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2208 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2209 return VINF_SUCCESS;
2210 }
2211#endif
2212 /*
2213 * Map shadow page table.
2214 */
2215 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2216 if (pShwPage)
2217 {
2218 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2219 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2220 if ( pPteDst->n.u1Present /** @todo Optimize accessed bit emulation? */
2221 && (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY))
2222 {
2223 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2224# ifdef VBOX_STRICT
2225 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2226 if (pPage)
2227 AssertMsg(!PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage),
2228 ("Unexpected dirty bit tracking on monitored page %RGv (phys %RGp)!!!!!!\n", GCPtrPage, pPteSrc->u & X86_PTE_PAE_PG_MASK));
2229# endif
2230 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2231
2232 Assert(pPteSrc->n.u1Write);
2233
2234 pPteDst->n.u1Write = 1;
2235 pPteDst->n.u1Dirty = 1;
2236 pPteDst->n.u1Accessed = 1;
2237 pPteDst->au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2238 PGM_INVL_PG(GCPtrPage);
2239
2240 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2241 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT;
2242 }
2243 }
2244 else
2245 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2246 }
2247 }
2248/** @todo Optimize accessed bit emulation? */
2249# ifdef VBOX_STRICT
2250 /*
2251 * Sanity check.
2252 */
2253 else if ( !pPteSrc->n.u1Dirty
2254 && (pPdeSrc->n.u1Write & pPteSrc->n.u1Write)
2255 && pPdeDst->n.u1Present)
2256 {
2257 PPGMPOOLPAGE pShwPage = pgmPoolGetPageByHCPhys(pVM, pPdeDst->u & SHW_PDE_PG_MASK);
2258 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2259 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2260 if ( pPteDst->n.u1Present
2261 && pPteDst->n.u1Write)
2262 LogFlow(("Writable present page %RGv not marked for dirty bit tracking!!!\n", GCPtrPage));
2263 }
2264# endif /* VBOX_STRICT */
2265 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2266 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2267 }
2268 AssertRC(rc);
2269 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2270 return rc;
2271
2272
2273l_UpperLevelPageFault:
2274 /*
2275 * Pagefault detected while checking the PML4E, PDPE or PDE.
2276 * Single exit handler to get rid of duplicate code paths.
2277 */
2278 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2279 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
2280 Log(("CheckPageFault: real page fault at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2281
2282 if (
2283# if PGM_GST_TYPE == PGM_TYPE_AMD64
2284 pPml4eSrc->n.u1Present &&
2285# endif
2286# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
2287 pPdpeSrc->n.u1Present &&
2288# endif
2289 pPdeSrc->n.u1Present)
2290 {
2291 /* Check the present bit as the shadow tables can cause different error codes by being out of sync. */
2292 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2293 {
2294 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2295 }
2296 else
2297 {
2298 /*
2299 * Map the guest page table.
2300 */
2301 PGSTPT pPTSrc;
2302 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2303 if (RT_SUCCESS(rc))
2304 {
2305 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2306 const GSTPTE PteSrc = *pPteSrc;
2307 if (pPteSrc->n.u1Present)
2308 TRPMSetErrorCode(pVM, uErr | X86_TRAP_PF_P); /* page-level protection violation */
2309 }
2310 AssertRC(rc);
2311 }
2312 }
2313 return VINF_EM_RAW_GUEST_TRAP;
2314}
2315#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2316
2317
2318/**
2319 * Sync a shadow page table.
2320 *
2321 * The shadow page table is not present. This includes the case where
2322 * there is a conflict with a mapping.
2323 *
2324 * @returns VBox status code.
2325 * @param pVM VM handle.
2326 * @param iPD Page directory index.
2327 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2328 * Assume this is a temporary mapping.
2329 * @param GCPtrPage GC Pointer of the page that caused the fault
2330 */
2331PGM_BTH_DECL(int, SyncPT)(PVM pVM, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2332{
2333 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2334 STAM_COUNTER_INC(&pVM->pgm.s.StatSyncPtPD[iPDSrc]);
2335 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2336
2337#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2338 || PGM_GST_TYPE == PGM_TYPE_PAE \
2339 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2340 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2341 && PGM_SHW_TYPE != PGM_TYPE_EPT
2342
2343 int rc = VINF_SUCCESS;
2344
2345 /*
2346 * Validate input a little bit.
2347 */
2348 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2349# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2350 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2351 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2352
2353 /* Fetch the pgm pool shadow descriptor. */
2354 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2355 Assert(pShwPde);
2356
2357# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2358 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2359 PPGMPOOLPAGE pShwPde;
2360 PX86PDPAE pPDDst;
2361 PSHWPDE pPdeDst;
2362
2363 /* Fetch the pgm pool shadow descriptor. */
2364 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2365 AssertRCSuccessReturn(rc, rc);
2366 Assert(pShwPde);
2367
2368 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2369 pPdeDst = &pPDDst->a[iPDDst];
2370
2371# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2372 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2373 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2374 PX86PDPAE pPDDst;
2375 PX86PDPT pPdptDst;
2376 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2377 AssertRCSuccessReturn(rc, rc);
2378 Assert(pPDDst);
2379 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2380# endif
2381 SHWPDE PdeDst = *pPdeDst;
2382
2383# if PGM_GST_TYPE == PGM_TYPE_AMD64
2384 /* Fetch the pgm pool shadow descriptor. */
2385 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2386 Assert(pShwPde);
2387# endif
2388
2389# ifndef PGM_WITHOUT_MAPPINGS
2390 /*
2391 * Check for conflicts.
2392 * GC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2393 * HC: Simply resolve the conflict.
2394 */
2395 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2396 {
2397 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2398# ifndef IN_RING3
2399 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2400 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2401 return VERR_ADDRESS_CONFLICT;
2402# else
2403 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2404 Assert(pMapping);
2405# if PGM_GST_TYPE == PGM_TYPE_32BIT
2406 int rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2407# elif PGM_GST_TYPE == PGM_TYPE_PAE
2408 int rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2409# else
2410 AssertFailed(); /* can't happen for amd64 */
2411# endif
2412 if (RT_FAILURE(rc))
2413 {
2414 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2415 return rc;
2416 }
2417 PdeDst = *pPdeDst;
2418# endif
2419 }
2420# else /* PGM_WITHOUT_MAPPINGS */
2421 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
2422# endif /* PGM_WITHOUT_MAPPINGS */
2423 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2424
2425# if defined(IN_RC)
2426 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2427 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2428# endif
2429
2430 /*
2431 * Sync page directory entry.
2432 */
2433 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2434 if (PdeSrc.n.u1Present)
2435 {
2436 /*
2437 * Allocate & map the page table.
2438 */
2439 PSHWPT pPTDst;
2440# if PGM_GST_TYPE == PGM_TYPE_AMD64
2441 const bool fPageTable = !PdeSrc.b.u1Size;
2442# else
2443 const bool fPageTable = !PdeSrc.b.u1Size || !(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
2444# endif
2445 PPGMPOOLPAGE pShwPage;
2446 RTGCPHYS GCPhys;
2447 if (fPageTable)
2448 {
2449 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2450# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2451 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2452 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2453# endif
2454 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2455 }
2456 else
2457 {
2458 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
2459# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2460 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2461 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2462# endif
2463 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, pShwPde->idx, iPDDst, &pShwPage);
2464 }
2465 if (rc == VINF_SUCCESS)
2466 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2467 else if (rc == VINF_PGM_CACHED_PAGE)
2468 {
2469 /*
2470 * The PT was cached, just hook it up.
2471 */
2472 if (fPageTable)
2473 PdeDst.u = pShwPage->Core.Key
2474 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2475 else
2476 {
2477 PdeDst.u = pShwPage->Core.Key
2478 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2479 /* (see explanation and assumptions further down.) */
2480 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2481 {
2482 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2483 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2484 PdeDst.b.u1Write = 0;
2485 }
2486 }
2487 *pPdeDst = PdeDst;
2488# if defined(IN_RC)
2489 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2490# endif
2491 return VINF_SUCCESS;
2492 }
2493 else if (rc == VERR_PGM_POOL_FLUSHED)
2494 {
2495 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
2496# if defined(IN_RC)
2497 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2498# endif
2499 return VINF_PGM_SYNC_CR3;
2500 }
2501 else
2502 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2503 PdeDst.u &= X86_PDE_AVL_MASK;
2504 PdeDst.u |= pShwPage->Core.Key;
2505
2506 /*
2507 * Page directory has been accessed (this is a fault situation, remember).
2508 */
2509 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2510 if (fPageTable)
2511 {
2512 /*
2513 * Page table - 4KB.
2514 *
2515 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2516 */
2517 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2518 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2519 PGSTPT pPTSrc;
2520 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2521 if (RT_SUCCESS(rc))
2522 {
2523 /*
2524 * Start by syncing the page directory entry so CSAM's TLB trick works.
2525 */
2526 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2527 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2528 *pPdeDst = PdeDst;
2529# if defined(IN_RC)
2530 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2531# endif
2532
2533 /*
2534 * Directory/page user or supervisor privilege: (same goes for read/write)
2535 *
2536 * Directory Page Combined
2537 * U/S U/S U/S
2538 * 0 0 0
2539 * 0 1 0
2540 * 1 0 0
2541 * 1 1 1
2542 *
2543 * Simple AND operation. Table listed for completeness.
2544 *
2545 */
2546 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2547# ifdef PGM_SYNC_N_PAGES
2548 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2549 unsigned iPTDst = iPTBase;
2550 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2551 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2552 iPTDst = 0;
2553 else
2554 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2555# else /* !PGM_SYNC_N_PAGES */
2556 unsigned iPTDst = 0;
2557 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2558# endif /* !PGM_SYNC_N_PAGES */
2559# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2560 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2561 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2562# else
2563 const unsigned offPTSrc = 0;
2564# endif
2565 for (; iPTDst < iPTDstEnd; iPTDst++)
2566 {
2567 const unsigned iPTSrc = iPTDst + offPTSrc;
2568 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2569
2570 if (PteSrc.n.u1Present) /* we've already cleared it above */
2571 {
2572# ifndef IN_RING0
2573 /*
2574 * Assuming kernel code will be marked as supervisor - and not as user level
2575 * and executed using a conforming code selector - And marked as readonly.
2576 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2577 */
2578 PPGMPAGE pPage;
2579 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2580 || !CSAMDoesPageNeedScanning(pVM, (RTRCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)))
2581 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2582 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2583 )
2584# endif
2585 PGM_BTH_NAME(SyncPageWorker)(pVM, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2586 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2587 (RTGCPTR)((iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT)),
2588 PteSrc.n.u1Present,
2589 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2590 PteSrc.n.u1User & PdeSrc.n.u1User,
2591 (uint64_t)PteSrc.u,
2592 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2593 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2594 }
2595 } /* for PTEs */
2596 }
2597 }
2598 else
2599 {
2600 /*
2601 * Big page - 2/4MB.
2602 *
2603 * We'll walk the ram range list in parallel and optimize lookups.
2604 * We will only sync on shadow page table at a time.
2605 */
2606 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2607
2608 /**
2609 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2610 */
2611
2612 /*
2613 * Start by syncing the page directory entry.
2614 */
2615 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2616 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2617
2618 /*
2619 * If the page is not flagged as dirty and is writable, then make it read-only
2620 * at PD level, so we can set the dirty bit when the page is modified.
2621 *
2622 * ASSUMES that page access handlers are implemented on page table entry level.
2623 * Thus we will first catch the dirty access and set PDE.D and restart. If
2624 * there is an access handler, we'll trap again and let it work on the problem.
2625 */
2626 /** @todo move the above stuff to a section in the PGM documentation. */
2627 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2628 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
2629 {
2630 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2631 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2632 PdeDst.b.u1Write = 0;
2633 }
2634 *pPdeDst = PdeDst;
2635# if defined(IN_RC)
2636 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2637# endif
2638
2639 /*
2640 * Fill the shadow page table.
2641 */
2642 /* Get address and flags from the source PDE. */
2643 SHWPTE PteDstBase;
2644 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2645
2646 /* Loop thru the entries in the shadow PT. */
2647 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2648 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2649 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2650 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2651 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2652 unsigned iPTDst = 0;
2653 while (iPTDst < RT_ELEMENTS(pPTDst->a))
2654 {
2655 /* Advance ram range list. */
2656 while (pRam && GCPhys > pRam->GCPhysLast)
2657 pRam = pRam->CTX_SUFF(pNext);
2658 if (pRam && GCPhys >= pRam->GCPhys)
2659 {
2660 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
2661 do
2662 {
2663 /* Make shadow PTE. */
2664 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2665 SHWPTE PteDst;
2666
2667# ifdef VBOX_WITH_NEW_PHYS_CODE
2668# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2669 /* Try make the page writable if necessary. */
2670 if ( PteDstBase.n.u1Write
2671 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2672 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2673 {
2674 rc = pgmPhysPageMakeWritableUnlocked(pVM, pPage, GCPhys);
2675 AssertRCReturn(rc, rc);
2676 }
2677# endif
2678# else /* !VBOX_WITH_NEW_PHYS_CODE */
2679 /* Make sure the RAM has already been allocated. */
2680 if (pRam->fFlags & MM_RAM_FLAGS_DYNAMIC_ALLOC) /** @todo PAGE FLAGS */
2681 {
2682 if (RT_UNLIKELY(!PGM_PAGE_GET_HCPHYS(pPage)))
2683 {
2684# ifdef IN_RING3
2685 int rc = pgmr3PhysGrowRange(pVM, GCPhys);
2686# else
2687 int rc = CTXALLMID(VMM, CallHost)(pVM, VMMCALLHOST_PGM_RAM_GROW_RANGE, GCPhys);
2688# endif
2689 if (rc != VINF_SUCCESS)
2690 return rc;
2691 }
2692 }
2693# endif /* !VBOX_WITH_NEW_PHYS_CODE */
2694
2695 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2696 {
2697 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
2698 {
2699 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2700 PteDst.n.u1Write = 0;
2701 }
2702 else
2703 PteDst.u = 0;
2704 }
2705# ifndef IN_RING0
2706 /*
2707 * Assuming kernel code will be marked as supervisor and not as user level and executed
2708 * using a conforming code selector. Don't check for readonly, as that implies the whole
2709 * 4MB can be code or readonly data. Linux enables write access for its large pages.
2710 */
2711 else if ( !PdeSrc.n.u1User
2712 && CSAMDoesPageNeedScanning(pVM, (RTRCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))))
2713 PteDst.u = 0;
2714# endif
2715 else
2716 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
2717
2718# ifdef VBOX_WITH_NEW_PHYS_CODE
2719 /* Only map writable pages writable. */
2720 if ( PteDst.n.u1Write
2721 && PteDst.n.u1Present
2722 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2723 {
2724 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2725 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2726 }
2727# endif
2728
2729# ifdef PGMPOOL_WITH_USER_TRACKING
2730 if (PteDst.n.u1Present)
2731 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVM, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2732# endif
2733 /* commit it */
2734 pPTDst->a[iPTDst] = PteDst;
2735 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2736 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
2737 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2738
2739 /* advance */
2740 GCPhys += PAGE_SIZE;
2741 iHCPage++;
2742 iPTDst++;
2743 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2744 && GCPhys <= pRam->GCPhysLast);
2745 }
2746 else if (pRam)
2747 {
2748 Log(("Invalid pages at %RGp\n", GCPhys));
2749 do
2750 {
2751 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2752 GCPhys += PAGE_SIZE;
2753 iPTDst++;
2754 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2755 && GCPhys < pRam->GCPhys);
2756 }
2757 else
2758 {
2759 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2760 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2761 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
2762 }
2763 } /* while more PTEs */
2764 } /* 4KB / 4MB */
2765 }
2766 else
2767 AssertRelease(!PdeDst.n.u1Present);
2768
2769 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2770 if (RT_FAILURE(rc))
2771 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
2772 return rc;
2773
2774#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2775 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2776 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2777 && !defined(IN_RC)
2778
2779 /*
2780 * Validate input a little bit.
2781 */
2782 int rc = VINF_SUCCESS;
2783# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2784 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2785 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
2786
2787 /* Fetch the pgm pool shadow descriptor. */
2788 PPGMPOOLPAGE pShwPde = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
2789 Assert(pShwPde);
2790
2791# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2792 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2793 PPGMPOOLPAGE pShwPde;
2794 PX86PDPAE pPDDst;
2795 PSHWPDE pPdeDst;
2796
2797 /* Fetch the pgm pool shadow descriptor. */
2798 rc = pgmShwGetPaePoolPagePD(&pVM->pgm.s, GCPtrPage, &pShwPde);
2799 AssertRCSuccessReturn(rc, rc);
2800 Assert(pShwPde);
2801
2802 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2803 pPdeDst = &pPDDst->a[iPDDst];
2804
2805# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2806 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2807 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2808 PX86PDPAE pPDDst;
2809 PX86PDPT pPdptDst;
2810 rc = pgmShwGetLongModePDPtr(pVM, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2811 AssertRCSuccessReturn(rc, rc);
2812 Assert(pPDDst);
2813 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2814
2815 /* Fetch the pgm pool shadow descriptor. */
2816 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2817 Assert(pShwPde);
2818
2819# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2820 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
2821 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2822 PEPTPD pPDDst;
2823 PEPTPDPT pPdptDst;
2824
2825 rc = pgmShwGetEPTPDPtr(pVM, GCPtrPage, &pPdptDst, &pPDDst);
2826 if (rc != VINF_SUCCESS)
2827 {
2828 AssertRC(rc);
2829 return rc;
2830 }
2831 Assert(pPDDst);
2832 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2833
2834 /* Fetch the pgm pool shadow descriptor. */
2835 PPGMPOOLPAGE pShwPde = pgmPoolGetPageByHCPhys(pVM, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
2836 Assert(pShwPde);
2837# endif
2838 SHWPDE PdeDst = *pPdeDst;
2839
2840 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
2841 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2842
2843 GSTPDE PdeSrc;
2844 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2845 PdeSrc.n.u1Present = 1;
2846 PdeSrc.n.u1Write = 1;
2847 PdeSrc.n.u1Accessed = 1;
2848 PdeSrc.n.u1User = 1;
2849
2850 /*
2851 * Allocate & map the page table.
2852 */
2853 PSHWPT pPTDst;
2854 PPGMPOOLPAGE pShwPage;
2855 RTGCPHYS GCPhys;
2856
2857 /* Virtual address = physical address */
2858 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
2859 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2860
2861 if ( rc == VINF_SUCCESS
2862 || rc == VINF_PGM_CACHED_PAGE)
2863 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2864 else
2865 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2866
2867 PdeDst.u &= X86_PDE_AVL_MASK;
2868 PdeDst.u |= pShwPage->Core.Key;
2869 PdeDst.n.u1Present = 1;
2870 PdeDst.n.u1Write = 1;
2871# if PGM_SHW_TYPE == PGM_TYPE_EPT
2872 PdeDst.n.u1Execute = 1;
2873# else
2874 PdeDst.n.u1User = 1;
2875 PdeDst.n.u1Accessed = 1;
2876# endif
2877 *pPdeDst = PdeDst;
2878
2879 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
2880 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2881 return rc;
2882
2883#else
2884 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
2885 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2886 return VERR_INTERNAL_ERROR;
2887#endif
2888}
2889
2890
2891
2892/**
2893 * Prefetch a page/set of pages.
2894 *
2895 * Typically used to sync commonly used pages before entering raw mode
2896 * after a CR3 reload.
2897 *
2898 * @returns VBox status code.
2899 * @param pVM VM handle.
2900 * @param GCPtrPage Page to invalidate.
2901 */
2902PGM_BTH_DECL(int, PrefetchPage)(PVM pVM, RTGCPTR GCPtrPage)
2903{
2904#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2905 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
2906 /*
2907 * Check that all Guest levels thru the PDE are present, getting the
2908 * PD and PDE in the processes.
2909 */
2910 int rc = VINF_SUCCESS;
2911# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2912# if PGM_GST_TYPE == PGM_TYPE_32BIT
2913 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
2914 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
2915# elif PGM_GST_TYPE == PGM_TYPE_PAE
2916 unsigned iPDSrc;
2917 X86PDPE PdpeSrc;
2918 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
2919 if (!pPDSrc)
2920 return VINF_SUCCESS; /* not present */
2921# elif PGM_GST_TYPE == PGM_TYPE_AMD64
2922 unsigned iPDSrc;
2923 PX86PML4E pPml4eSrc;
2924 X86PDPE PdpeSrc;
2925 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
2926 if (!pPDSrc)
2927 return VINF_SUCCESS; /* not present */
2928# endif
2929 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2930# else
2931 PGSTPD pPDSrc = NULL;
2932 const unsigned iPDSrc = 0;
2933 GSTPDE PdeSrc;
2934
2935 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
2936 PdeSrc.n.u1Present = 1;
2937 PdeSrc.n.u1Write = 1;
2938 PdeSrc.n.u1Accessed = 1;
2939 PdeSrc.n.u1User = 1;
2940# endif
2941
2942 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
2943 {
2944# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2945 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVM->pgm.s, GCPtrPage);
2946# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2947 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2948 PX86PDPAE pPDDst;
2949 X86PDEPAE PdeDst;
2950# if PGM_GST_TYPE != PGM_TYPE_PAE
2951 X86PDPE PdpeSrc;
2952
2953 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
2954 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
2955# endif
2956 int rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
2957 if (rc != VINF_SUCCESS)
2958 {
2959 AssertRC(rc);
2960 return rc;
2961 }
2962 Assert(pPDDst);
2963 PdeDst = pPDDst->a[iPDDst];
2964
2965# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2966 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2967 PX86PDPAE pPDDst;
2968 X86PDEPAE PdeDst;
2969
2970# if PGM_GST_TYPE == PGM_TYPE_PROT
2971 /* AMD-V nested paging */
2972 X86PML4E Pml4eSrc;
2973 X86PDPE PdpeSrc;
2974 PX86PML4E pPml4eSrc = &Pml4eSrc;
2975
2976 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
2977 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
2978 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
2979# endif
2980
2981 int rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
2982 if (rc != VINF_SUCCESS)
2983 {
2984 AssertRC(rc);
2985 return rc;
2986 }
2987 Assert(pPDDst);
2988 PdeDst = pPDDst->a[iPDDst];
2989# endif
2990 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
2991 {
2992 if (!PdeDst.n.u1Present)
2993 /** r=bird: This guy will set the A bit on the PDE, probably harmless. */
2994 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
2995 else
2996 {
2997 /** @note We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
2998 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
2999 * makes no sense to prefetch more than one page.
3000 */
3001 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3002 if (RT_SUCCESS(rc))
3003 rc = VINF_SUCCESS;
3004 }
3005 }
3006 }
3007 return rc;
3008
3009#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3010 return VINF_SUCCESS; /* ignore */
3011#endif
3012}
3013
3014
3015
3016
3017/**
3018 * Syncs a page during a PGMVerifyAccess() call.
3019 *
3020 * @returns VBox status code (informational included).
3021 * @param GCPtrPage The address of the page to sync.
3022 * @param fPage The effective guest page flags.
3023 * @param uErr The trap error code.
3024 */
3025PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVM pVM, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3026{
3027 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3028
3029 Assert(!HWACCMIsNestedPagingActive(pVM));
3030#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_TYPE_AMD64) \
3031 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3032
3033# ifndef IN_RING0
3034 if (!(fPage & X86_PTE_US))
3035 {
3036 /*
3037 * Mark this page as safe.
3038 */
3039 /** @todo not correct for pages that contain both code and data!! */
3040 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3041 CSAMMarkPage(pVM, (RTRCPTR)GCPtrPage, true);
3042 }
3043# endif
3044
3045 /*
3046 * Get guest PD and index.
3047 */
3048# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3049# if PGM_GST_TYPE == PGM_TYPE_32BIT
3050 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3051 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3052# elif PGM_GST_TYPE == PGM_TYPE_PAE
3053 unsigned iPDSrc;
3054 X86PDPE PdpeSrc;
3055 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtrPage, &iPDSrc, &PdpeSrc);
3056
3057 if (pPDSrc)
3058 {
3059 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3060 return VINF_EM_RAW_GUEST_TRAP;
3061 }
3062# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3063 unsigned iPDSrc;
3064 PX86PML4E pPml4eSrc;
3065 X86PDPE PdpeSrc;
3066 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3067 if (!pPDSrc)
3068 {
3069 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3070 return VINF_EM_RAW_GUEST_TRAP;
3071 }
3072# endif
3073# else
3074 PGSTPD pPDSrc = NULL;
3075 const unsigned iPDSrc = 0;
3076# endif
3077 int rc = VINF_SUCCESS;
3078
3079 /*
3080 * First check if the shadow pd is present.
3081 */
3082# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3083 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVM->pgm.s, GCPtrPage);
3084# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3085 PX86PDEPAE pPdeDst;
3086 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3087 PX86PDPAE pPDDst;
3088# if PGM_GST_TYPE != PGM_TYPE_PAE
3089 X86PDPE PdpeSrc;
3090
3091 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3092 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3093# endif
3094 rc = pgmShwSyncPaePDPtr(pVM, GCPtrPage, &PdpeSrc, &pPDDst);
3095 if (rc != VINF_SUCCESS)
3096 {
3097 AssertRC(rc);
3098 return rc;
3099 }
3100 Assert(pPDDst);
3101 pPdeDst = &pPDDst->a[iPDDst];
3102
3103# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3104 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3105 PX86PDPAE pPDDst;
3106 PX86PDEPAE pPdeDst;
3107
3108# if PGM_GST_TYPE == PGM_TYPE_PROT
3109 /* AMD-V nested paging */
3110 X86PML4E Pml4eSrc;
3111 X86PDPE PdpeSrc;
3112 PX86PML4E pPml4eSrc = &Pml4eSrc;
3113
3114 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3115 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_NX | X86_PML4E_A;
3116 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_NX | X86_PDPE_A;
3117# endif
3118
3119 rc = pgmShwSyncLongModePDPtr(pVM, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3120 if (rc != VINF_SUCCESS)
3121 {
3122 AssertRC(rc);
3123 return rc;
3124 }
3125 Assert(pPDDst);
3126 pPdeDst = &pPDDst->a[iPDDst];
3127# endif
3128 if (!pPdeDst->n.u1Present)
3129 {
3130 rc = PGM_BTH_NAME(SyncPT)(pVM, iPDSrc, pPDSrc, GCPtrPage);
3131 AssertRC(rc);
3132 if (rc != VINF_SUCCESS)
3133 return rc;
3134 }
3135
3136# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3137 /* Check for dirty bit fault */
3138 rc = PGM_BTH_NAME(CheckPageFault)(pVM, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3139 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3140 Log(("PGMVerifyAccess: success (dirty)\n"));
3141 else
3142 {
3143 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3144#else
3145 {
3146 GSTPDE PdeSrc;
3147 PdeSrc.au32[0] = 0; /* faked so we don't have to #ifdef everything */
3148 PdeSrc.n.u1Present = 1;
3149 PdeSrc.n.u1Write = 1;
3150 PdeSrc.n.u1Accessed = 1;
3151 PdeSrc.n.u1User = 1;
3152
3153#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
3154 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3155 if (uErr & X86_TRAP_PF_US)
3156 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3157 else /* supervisor */
3158 STAM_COUNTER_INC(&pVM->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3159
3160 rc = PGM_BTH_NAME(SyncPage)(pVM, PdeSrc, GCPtrPage, 1, 0);
3161 if (RT_SUCCESS(rc))
3162 {
3163 /* Page was successfully synced */
3164 Log2(("PGMVerifyAccess: success (sync)\n"));
3165 rc = VINF_SUCCESS;
3166 }
3167 else
3168 {
3169 Log(("PGMVerifyAccess: access violation for %RGv rc=%d\n", GCPtrPage, rc));
3170 return VINF_EM_RAW_GUEST_TRAP;
3171 }
3172 }
3173 return rc;
3174
3175#else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3176
3177 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3178 return VERR_INTERNAL_ERROR;
3179#endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
3180}
3181
3182
3183#if PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
3184# if PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
3185/**
3186 * Figures out which kind of shadow page this guest PDE warrants.
3187 *
3188 * @returns Shadow page kind.
3189 * @param pPdeSrc The guest PDE in question.
3190 * @param cr4 The current guest cr4 value.
3191 */
3192DECLINLINE(PGMPOOLKIND) PGM_BTH_NAME(CalcPageKind)(const GSTPDE *pPdeSrc, uint32_t cr4)
3193{
3194# if PMG_GST_TYPE == PGM_TYPE_AMD64
3195 if (!pPdeSrc->n.u1Size)
3196# else
3197 if (!pPdeSrc->n.u1Size || !(cr4 & X86_CR4_PSE))
3198# endif
3199 return BTH_PGMPOOLKIND_PT_FOR_PT;
3200 //switch (pPdeSrc->u & (X86_PDE4M_RW | X86_PDE4M_US /*| X86_PDE4M_PAE_NX*/))
3201 //{
3202 // case 0:
3203 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RO;
3204 // case X86_PDE4M_RW:
3205 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW;
3206 // case X86_PDE4M_US:
3207 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US;
3208 // case X86_PDE4M_RW | X86_PDE4M_US:
3209 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US;
3210# if 0
3211 // case X86_PDE4M_PAE_NX:
3212 // return BTH_PGMPOOLKIND_PT_FOR_BIG_NX;
3213 // case X86_PDE4M_RW | X86_PDE4M_PAE_NX:
3214 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_NX;
3215 // case X86_PDE4M_US | X86_PDE4M_PAE_NX:
3216 // return BTH_PGMPOOLKIND_PT_FOR_BIG_US_NX;
3217 // case X86_PDE4M_RW | X86_PDE4M_US | X86_PDE4M_PAE_NX:
3218 // return BTH_PGMPOOLKIND_PT_FOR_BIG_RW_US_NX;
3219# endif
3220 return BTH_PGMPOOLKIND_PT_FOR_BIG;
3221 //}
3222}
3223# endif
3224#endif
3225
3226#undef MY_STAM_COUNTER_INC
3227#define MY_STAM_COUNTER_INC(a) do { } while (0)
3228
3229
3230/**
3231 * Syncs the paging hierarchy starting at CR3.
3232 *
3233 * @returns VBox status code, no specials.
3234 * @param pVM The virtual machine.
3235 * @param cr0 Guest context CR0 register
3236 * @param cr3 Guest context CR3 register
3237 * @param cr4 Guest context CR4 register
3238 * @param fGlobal Including global page directories or not
3239 */
3240PGM_BTH_DECL(int, SyncCR3)(PVM pVM, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3241{
3242 if (VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
3243 fGlobal = true; /* Change this CR3 reload to be a global one. */
3244
3245 LogFlow(("SyncCR3 %d\n", fGlobal));
3246
3247#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3248 /*
3249 * Update page access handlers.
3250 * The virtual are always flushed, while the physical are only on demand.
3251 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3252 * have to look into that later because it will have a bad influence on the performance.
3253 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3254 * bird: Yes, but that won't work for aliases.
3255 */
3256 /** @todo this MUST go away. See #1557. */
3257 STAM_PROFILE_START(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3258 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3259 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3260#endif
3261
3262#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3263 /*
3264 * Nested / EPT - almost no work.
3265 */
3266 /** @todo check if this is really necessary; the call does it as well... */
3267 HWACCMFlushTLB(pVM);
3268 return VINF_SUCCESS;
3269
3270#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3271 /*
3272 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3273 * out the shadow parts when the guest modifies its tables.
3274 */
3275 return VINF_SUCCESS;
3276
3277#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3278
3279# ifdef PGM_WITHOUT_MAPPINGS
3280 Assert(pVM->pgm.s.fMappingsFixed);
3281 return VINF_SUCCESS;
3282# else
3283 /* Nothing to do when mappings are fixed. */
3284 if (pVM->pgm.s.fMappingsFixed)
3285 return VINF_SUCCESS;
3286
3287 int rc = PGMMapResolveConflicts(pVM);
3288 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3289 if (rc == VINF_PGM_SYNC_CR3)
3290 {
3291 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3292 return VINF_PGM_SYNC_CR3;
3293 }
3294# endif
3295 return VINF_SUCCESS;
3296#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3297}
3298
3299
3300
3301
3302#ifdef VBOX_STRICT
3303#ifdef IN_RC
3304# undef AssertMsgFailed
3305# define AssertMsgFailed Log
3306#endif
3307#ifdef IN_RING3
3308# include <VBox/dbgf.h>
3309
3310/**
3311 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3312 *
3313 * @returns VBox status code (VINF_SUCCESS).
3314 * @param pVM The VM handle.
3315 * @param cr3 The root of the hierarchy.
3316 * @param crr The cr4, only PAE and PSE is currently used.
3317 * @param fLongMode Set if long mode, false if not long mode.
3318 * @param cMaxDepth Number of levels to dump.
3319 * @param pHlp Pointer to the output functions.
3320 */
3321__BEGIN_DECLS
3322VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3323__END_DECLS
3324
3325#endif
3326
3327/**
3328 * Checks that the shadow page table is in sync with the guest one.
3329 *
3330 * @returns The number of errors.
3331 * @param pVM The virtual machine.
3332 * @param cr3 Guest context CR3 register
3333 * @param cr4 Guest context CR4 register
3334 * @param GCPtr Where to start. Defaults to 0.
3335 * @param cb How much to check. Defaults to everything.
3336 */
3337PGM_BTH_DECL(unsigned, AssertCR3)(PVM pVM, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3338{
3339#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3340 return 0;
3341#else
3342 unsigned cErrors = 0;
3343
3344#if PGM_GST_TYPE == PGM_TYPE_PAE
3345 /** @todo currently broken; crashes below somewhere */
3346 AssertFailed();
3347#endif
3348
3349#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3350 || PGM_GST_TYPE == PGM_TYPE_PAE \
3351 || PGM_GST_TYPE == PGM_TYPE_AMD64
3352
3353# if PGM_GST_TYPE == PGM_TYPE_AMD64
3354 bool fBigPagesSupported = true;
3355# else
3356 bool fBigPagesSupported = !!(CPUMGetGuestCR4(pVM) & X86_CR4_PSE);
3357# endif
3358 PPGM pPGM = &pVM->pgm.s;
3359 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3360 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3361# ifndef IN_RING0
3362 RTHCPHYS HCPhys; /* general usage. */
3363# endif
3364 int rc;
3365
3366 /*
3367 * Check that the Guest CR3 and all its mappings are correct.
3368 */
3369 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3370 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3371 false);
3372# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3373# if PGM_GST_TYPE == PGM_TYPE_32BIT
3374 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3375# else
3376 rc = PGMShwGetPage(pVM, (RTGCPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3377# endif
3378 AssertRCReturn(rc, 1);
3379 HCPhys = NIL_RTHCPHYS;
3380 rc = pgmRamGCPhys2HCPhys(pPGM, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3381 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3382# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3383 RTGCPHYS GCPhys;
3384 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3385 AssertRCReturn(rc, 1);
3386 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3387# endif
3388# endif /* !IN_RING0 */
3389
3390 /*
3391 * Get and check the Shadow CR3.
3392 */
3393# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3394 unsigned cPDEs = X86_PG_ENTRIES;
3395 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3396# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3397# if PGM_GST_TYPE == PGM_TYPE_32BIT
3398 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3399# else
3400 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3401# endif
3402 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3403# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3404 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3405 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3406# endif
3407 if (cb != ~(RTGCPTR)0)
3408 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3409
3410/** @todo call the other two PGMAssert*() functions. */
3411
3412# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3413 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3414# endif
3415
3416# if PGM_GST_TYPE == PGM_TYPE_AMD64
3417 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3418
3419 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3420 {
3421 PPGMPOOLPAGE pShwPdpt = NULL;
3422 PX86PML4E pPml4eSrc;
3423 PX86PML4E pPml4eDst;
3424 RTGCPHYS GCPhysPdptSrc;
3425
3426 pPml4eSrc = pgmGstGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3427 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVM->pgm.s, iPml4);
3428
3429 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3430 if (!pPml4eDst->n.u1Present)
3431 {
3432 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3433 continue;
3434 }
3435
3436 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3437 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3438
3439 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3440 {
3441 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3442 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3443 cErrors++;
3444 continue;
3445 }
3446
3447 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3448 {
3449 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3450 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3451 cErrors++;
3452 continue;
3453 }
3454
3455 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3456 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3457 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3458 {
3459 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3460 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3461 cErrors++;
3462 continue;
3463 }
3464# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3465 {
3466# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3467
3468# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3469 /*
3470 * Check the PDPTEs too.
3471 */
3472 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3473
3474 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3475 {
3476 unsigned iPDSrc;
3477 PPGMPOOLPAGE pShwPde = NULL;
3478 PX86PDPE pPdpeDst;
3479 RTGCPHYS GCPhysPdeSrc;
3480# if PGM_GST_TYPE == PGM_TYPE_PAE
3481 X86PDPE PdpeSrc;
3482 PGSTPD pPDSrc = pgmGstGetPaePDPtr(&pVM->pgm.s, GCPtr, &iPDSrc, &PdpeSrc);
3483 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVM->pgm.s);
3484# else
3485 PX86PML4E pPml4eSrc;
3486 X86PDPE PdpeSrc;
3487 PX86PDPT pPdptDst;
3488 PX86PDPAE pPDDst;
3489 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(&pVM->pgm.s, GCPtr, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3490
3491 rc = pgmShwGetLongModePDPtr(pVM, GCPtr, NULL, &pPdptDst, &pPDDst);
3492 if (rc != VINF_SUCCESS)
3493 {
3494 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3495 GCPtr += 512 * _2M;
3496 continue; /* next PDPTE */
3497 }
3498 Assert(pPDDst);
3499# endif
3500 Assert(iPDSrc == 0);
3501
3502 pPdpeDst = &pPdptDst->a[iPdpt];
3503
3504 if (!pPdpeDst->n.u1Present)
3505 {
3506 GCPtr += 512 * _2M;
3507 continue; /* next PDPTE */
3508 }
3509
3510 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3511 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3512
3513 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3514 {
3515 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3516 GCPtr += 512 * _2M;
3517 cErrors++;
3518 continue;
3519 }
3520
3521 if (GCPhysPdeSrc != pShwPde->GCPhys)
3522 {
3523# if PGM_GST_TYPE == PGM_TYPE_AMD64
3524 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3525# else
3526 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3527# endif
3528 GCPtr += 512 * _2M;
3529 cErrors++;
3530 continue;
3531 }
3532
3533# if PGM_GST_TYPE == PGM_TYPE_AMD64
3534 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3535 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3536 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3537 {
3538 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3539 GCPtr += 512 * _2M;
3540 cErrors++;
3541 continue;
3542 }
3543# endif
3544
3545# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3546 {
3547# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3548# if PGM_GST_TYPE == PGM_TYPE_32BIT
3549 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(&pVM->pgm.s);
3550# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3551 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVM->pgm.s);
3552# endif
3553# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3554 /*
3555 * Iterate the shadow page directory.
3556 */
3557 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3558 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3559
3560 for (;
3561 iPDDst < cPDEs;
3562 iPDDst++, GCPtr += cIncrement)
3563 {
3564# if PGM_SHW_TYPE == PGM_TYPE_PAE
3565 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3566# else
3567 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3568# endif
3569 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3570 {
3571 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3572 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3573 {
3574 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
3575 cErrors++;
3576 continue;
3577 }
3578 }
3579 else if ( (PdeDst.u & X86_PDE_P)
3580 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
3581 )
3582 {
3583 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3584 PPGMPOOLPAGE pPoolPage = pgmPoolGetPageByHCPhys(pVM, HCPhysShw);
3585 if (!pPoolPage)
3586 {
3587 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3588 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3589 cErrors++;
3590 continue;
3591 }
3592 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
3593
3594 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3595 {
3596 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3597 GCPtr, (uint64_t)PdeDst.u));
3598 cErrors++;
3599 }
3600
3601 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3602 {
3603 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3604 GCPtr, (uint64_t)PdeDst.u));
3605 cErrors++;
3606 }
3607
3608 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3609 if (!PdeSrc.n.u1Present)
3610 {
3611 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3612 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3613 cErrors++;
3614 continue;
3615 }
3616
3617 if ( !PdeSrc.b.u1Size
3618 || !fBigPagesSupported)
3619 {
3620 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
3621# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3622 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
3623# endif
3624 }
3625 else
3626 {
3627# if PGM_GST_TYPE == PGM_TYPE_32BIT
3628 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3629 {
3630 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3631 GCPtr, (uint64_t)PdeSrc.u));
3632 cErrors++;
3633 continue;
3634 }
3635# endif
3636 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(PdeSrc);
3637# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3638 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
3639# endif
3640 }
3641
3642 if ( pPoolPage->enmKind
3643 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3644 {
3645 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3646 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3647 cErrors++;
3648 }
3649
3650 PPGMPAGE pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3651 if (!pPhysPage)
3652 {
3653 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3654 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3655 cErrors++;
3656 continue;
3657 }
3658
3659 if (GCPhysGst != pPoolPage->GCPhys)
3660 {
3661 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3662 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3663 cErrors++;
3664 continue;
3665 }
3666
3667 if ( !PdeSrc.b.u1Size
3668 || !fBigPagesSupported)
3669 {
3670 /*
3671 * Page Table.
3672 */
3673 const GSTPT *pPTSrc;
3674 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
3675 if (RT_FAILURE(rc))
3676 {
3677 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3678 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3679 cErrors++;
3680 continue;
3681 }
3682 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3683 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3684 {
3685 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3686 // (This problem will go away when/if we shadow multiple CR3s.)
3687 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3688 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3689 cErrors++;
3690 continue;
3691 }
3692 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3693 {
3694 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3695 GCPtr, (uint64_t)PdeDst.u));
3696 cErrors++;
3697 continue;
3698 }
3699
3700 /* iterate the page table. */
3701# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3702 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3703 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3704# else
3705 const unsigned offPTSrc = 0;
3706# endif
3707 for (unsigned iPT = 0, off = 0;
3708 iPT < RT_ELEMENTS(pPTDst->a);
3709 iPT++, off += PAGE_SIZE)
3710 {
3711 const SHWPTE PteDst = pPTDst->a[iPT];
3712
3713 /* skip not-present entries. */
3714 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3715 continue;
3716 Assert(PteDst.n.u1Present);
3717
3718 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3719 if (!PteSrc.n.u1Present)
3720 {
3721# ifdef IN_RING3
3722 PGMAssertHandlerAndFlagsInSync(pVM);
3723 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
3724# endif
3725 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3726 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3727 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
3728 cErrors++;
3729 continue;
3730 }
3731
3732 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3733# if 1 /** @todo sync accessed bit properly... */
3734 fIgnoreFlags |= X86_PTE_A;
3735# endif
3736
3737 /* match the physical addresses */
3738 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
3739 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
3740
3741# ifdef IN_RING3
3742 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3743 if (RT_FAILURE(rc))
3744 {
3745 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3746 {
3747 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3748 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3749 cErrors++;
3750 continue;
3751 }
3752 }
3753 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3754 {
3755 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3756 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3757 cErrors++;
3758 continue;
3759 }
3760# endif
3761
3762 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3763 if (!pPhysPage)
3764 {
3765# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3766 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3767 {
3768 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3769 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3770 cErrors++;
3771 continue;
3772 }
3773# endif
3774 if (PteDst.n.u1Write)
3775 {
3776 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3777 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3778 cErrors++;
3779 }
3780 fIgnoreFlags |= X86_PTE_RW;
3781 }
3782 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
3783 {
3784 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3785 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3786 cErrors++;
3787 continue;
3788 }
3789
3790 /* flags */
3791 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
3792 {
3793 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
3794 {
3795 if (PteDst.n.u1Write)
3796 {
3797 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3798 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3799 cErrors++;
3800 continue;
3801 }
3802 fIgnoreFlags |= X86_PTE_RW;
3803 }
3804 else
3805 {
3806 if (PteDst.n.u1Present)
3807 {
3808 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
3809 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3810 cErrors++;
3811 continue;
3812 }
3813 fIgnoreFlags |= X86_PTE_P;
3814 }
3815 }
3816 else
3817 {
3818 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
3819 {
3820 if (PteDst.n.u1Write)
3821 {
3822 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
3823 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3824 cErrors++;
3825 continue;
3826 }
3827 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
3828 {
3829 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3830 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3831 cErrors++;
3832 continue;
3833 }
3834 if (PteDst.n.u1Dirty)
3835 {
3836 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3837 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3838 cErrors++;
3839 }
3840# if 0 /** @todo sync access bit properly... */
3841 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
3842 {
3843 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3844 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3845 cErrors++;
3846 }
3847 fIgnoreFlags |= X86_PTE_RW;
3848# else
3849 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3850# endif
3851 }
3852 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3853 {
3854 /* access bit emulation (not implemented). */
3855 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
3856 {
3857 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
3858 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3859 cErrors++;
3860 continue;
3861 }
3862 if (!PteDst.n.u1Accessed)
3863 {
3864 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
3865 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3866 cErrors++;
3867 }
3868 fIgnoreFlags |= X86_PTE_P;
3869 }
3870# ifdef DEBUG_sandervl
3871 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
3872# endif
3873 }
3874
3875 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
3876 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
3877 )
3878 {
3879 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
3880 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
3881 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
3882 cErrors++;
3883 continue;
3884 }
3885 } /* foreach PTE */
3886 }
3887 else
3888 {
3889 /*
3890 * Big Page.
3891 */
3892 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
3893 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
3894 {
3895 if (PdeDst.n.u1Write)
3896 {
3897 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3898 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3899 cErrors++;
3900 continue;
3901 }
3902 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
3903 {
3904 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
3905 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3906 cErrors++;
3907 continue;
3908 }
3909# if 0 /** @todo sync access bit properly... */
3910 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
3911 {
3912 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
3913 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3914 cErrors++;
3915 }
3916 fIgnoreFlags |= X86_PTE_RW;
3917# else
3918 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
3919# endif
3920 }
3921 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3922 {
3923 /* access bit emulation (not implemented). */
3924 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
3925 {
3926 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3927 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3928 cErrors++;
3929 continue;
3930 }
3931 if (!PdeDst.n.u1Accessed)
3932 {
3933 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3934 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3935 cErrors++;
3936 }
3937 fIgnoreFlags |= X86_PTE_P;
3938 }
3939
3940 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
3941 {
3942 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
3943 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
3944 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3945 cErrors++;
3946 }
3947
3948 /* iterate the page table. */
3949 for (unsigned iPT = 0, off = 0;
3950 iPT < RT_ELEMENTS(pPTDst->a);
3951 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
3952 {
3953 const SHWPTE PteDst = pPTDst->a[iPT];
3954
3955 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
3956 {
3957 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
3958 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3959 cErrors++;
3960 }
3961
3962 /* skip not-present entries. */
3963 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
3964 continue;
3965
3966 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
3967
3968 /* match the physical addresses */
3969 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
3970
3971# ifdef IN_RING3
3972 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3973 if (RT_FAILURE(rc))
3974 {
3975 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3976 {
3977 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3978 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3979 cErrors++;
3980 }
3981 }
3982 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
3983 {
3984 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
3985 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3986 cErrors++;
3987 continue;
3988 }
3989# endif
3990 pPhysPage = pgmPhysGetPage(pPGM, GCPhysGst);
3991 if (!pPhysPage)
3992 {
3993# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
3994 if (HCPhysShw != MMR3PageDummyHCPhys(pVM))
3995 {
3996 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
3997 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
3998 cErrors++;
3999 continue;
4000 }
4001# endif
4002 if (PteDst.n.u1Write)
4003 {
4004 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4005 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4006 cErrors++;
4007 }
4008 fIgnoreFlags |= X86_PTE_RW;
4009 }
4010 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4011 {
4012 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4013 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4014 cErrors++;
4015 continue;
4016 }
4017
4018 /* flags */
4019 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4020 {
4021 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4022 {
4023 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4024 {
4025 if (PteDst.n.u1Write)
4026 {
4027 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4028 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4029 cErrors++;
4030 continue;
4031 }
4032 fIgnoreFlags |= X86_PTE_RW;
4033 }
4034 }
4035 else
4036 {
4037 if (PteDst.n.u1Present)
4038 {
4039 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4040 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4041 cErrors++;
4042 continue;
4043 }
4044 fIgnoreFlags |= X86_PTE_P;
4045 }
4046 }
4047
4048 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4049 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4050 )
4051 {
4052 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4053 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4054 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4055 cErrors++;
4056 continue;
4057 }
4058 } /* for each PTE */
4059 }
4060 }
4061 /* not present */
4062
4063 } /* for each PDE */
4064
4065 } /* for each PDPTE */
4066
4067 } /* for each PML4E */
4068
4069# ifdef DEBUG
4070 if (cErrors)
4071 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4072# endif
4073
4074#endif /* GST == 32BIT, PAE or AMD64 */
4075 return cErrors;
4076
4077#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4078}
4079#endif /* VBOX_STRICT */
4080
4081
4082/**
4083 * Sets up the CR3 for shadow paging
4084 *
4085 * @returns Strict VBox status code.
4086 * @retval VINF_SUCCESS.
4087 *
4088 * @param pVM VM handle.
4089 * @param GCPhysCR3 The physical address in the CR3 register.
4090 */
4091PGM_BTH_DECL(int, MapCR3)(PVM pVM, RTGCPHYS GCPhysCR3)
4092{
4093 /* Update guest paging info. */
4094#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4095 || PGM_GST_TYPE == PGM_TYPE_PAE \
4096 || PGM_GST_TYPE == PGM_TYPE_AMD64
4097
4098 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4099
4100 /*
4101 * Map the page CR3 points at.
4102 */
4103 RTHCPTR HCPtrGuestCR3;
4104 RTHCPHYS HCPhysGuestCR3;
4105# ifdef VBOX_WITH_NEW_PHYS_CODE
4106 /** @todo this needs some reworking. current code is just a big hack. */
4107# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4108# if 1 /* temp hack */
4109 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
4110 return VINF_PGM_SYNC_CR3;
4111# else
4112 AssertFailedReturn(VERR_INTERNAL_ERROR);
4113# endif
4114 int rc = VERR_INTERNAL_ERROR;
4115# else
4116 pgmLock(pVM);
4117 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4118 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4119 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3);
4120 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPage);
4121 pgmUnlock(pVM);
4122# endif
4123# else /* !VBOX_WITH_NEW_PHYS_CODE */
4124 int rc = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhysCR3 & GST_CR3_PAGE_MASK, &HCPtrGuestCR3, &HCPhysGuestCR3);
4125# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4126 if (RT_SUCCESS(rc))
4127 {
4128 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4129 if (RT_SUCCESS(rc))
4130 {
4131# ifdef IN_RC
4132 PGM_INVL_PG(pVM->pgm.s.GCPtrCR3Mapping);
4133# endif
4134# if PGM_GST_TYPE == PGM_TYPE_32BIT
4135 pVM->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4136# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4137 pVM->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4138# endif
4139 pVM->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))pVM->pgm.s.GCPtrCR3Mapping;
4140
4141# elif PGM_GST_TYPE == PGM_TYPE_PAE
4142 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4143 pVM->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4144# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4145 pVM->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4146# endif
4147 pVM->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RCPTRTYPE(uint8_t *))pVM->pgm.s.GCPtrCR3Mapping + off);
4148 Log(("Cached mapping %RRv\n", pVM->pgm.s.pGstPaePdptRC));
4149
4150 /*
4151 * Map the 4 PDs too.
4152 */
4153 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(&pVM->pgm.s);
4154 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4155 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4156 {
4157 if (pGuestPDPT->a[i].n.u1Present)
4158 {
4159 RTHCPTR HCPtr;
4160 RTHCPHYS HCPhys;
4161 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4162# ifdef VBOX_WITH_NEW_PHYS_CODE
4163# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4164 AssertFailedReturn(VERR_INTERNAL_ERROR);
4165 int rc2 = VERR_INTERNAL_ERROR;
4166# else
4167 pgmLock(pVM);
4168 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4169 AssertReturn(pPage, VERR_INTERNAL_ERROR);
4170 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4171 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4172 pgmUnlock(pVM);
4173# endif
4174# else /* !VBOX_WITH_NEW_PHYS_CODE */
4175 int rc2 = pgmRamGCPhys2HCPtrAndHCPhys(&pVM->pgm.s, GCPhys, &HCPtr, &HCPhys);
4176# endif /* !VBOX_WITH_NEW_PHYS_CODE */
4177 if (RT_SUCCESS(rc2))
4178 {
4179 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4180 AssertRCReturn(rc, rc);
4181
4182 pVM->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4183# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4184 pVM->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4185# endif
4186 pVM->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))GCPtr;
4187 pVM->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4188 PGM_INVL_PG(GCPtr); /** @todo This ends up calling HWACCMInvalidatePage, is that correct? */
4189 continue;
4190 }
4191 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4192 }
4193
4194 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4195# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4196 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4197# endif
4198 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4199 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4200 PGM_INVL_PG(GCPtr); /** @todo this shouldn't be necessary? */
4201 }
4202
4203# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4204 pVM->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4205# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4206 pVM->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4207# endif
4208# endif
4209 }
4210 else
4211 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4212 }
4213 else
4214 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4215
4216#else /* prot/real stub */
4217 int rc = VINF_SUCCESS;
4218#endif
4219
4220 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4221# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4222 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4223 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4224 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4225 && PGM_GST_TYPE != PGM_TYPE_PROT))
4226
4227 Assert(!HWACCMIsNestedPagingActive(pVM));
4228
4229 /*
4230 * Update the shadow root page as well since that's not fixed.
4231 */
4232 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4233 PPGMPOOLPAGE pOldShwPageCR3 = pVM->pgm.s.CTX_SUFF(pShwPageCR3);
4234 uint32_t iOldShwUserTable = pVM->pgm.s.iShwUserTable;
4235 uint32_t iOldShwUser = pVM->pgm.s.iShwUser;
4236 PPGMPOOLPAGE pNewShwPageCR3;
4237
4238 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4239 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3);
4240 if (rc == VERR_PGM_POOL_FLUSHED)
4241 {
4242 Log(("MapCR3: PGM pool flushed -> signal sync cr3\n"));
4243 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4244 return VINF_PGM_SYNC_CR3;
4245 }
4246 AssertRCReturn(rc, rc);
4247 rc = VINF_SUCCESS;
4248
4249 /* Mark the page as locked; disallow flushing. */
4250 pgmPoolLockPage(pPool, pNewShwPageCR3);
4251
4252# ifdef IN_RC
4253 /** NOTE: We can't deal with jumps to ring 3 here as we're now in an inconsistent state! */
4254 bool fLog = VMMGCLogDisable(pVM);
4255# endif
4256
4257 pVM->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4258 pVM->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4259 pVM->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4260# ifdef IN_RING0
4261 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4262 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4263# elif defined(IN_RC)
4264 pVM->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4265 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4266# else
4267 pVM->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4268 pVM->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4269# endif
4270
4271# ifndef PGM_WITHOUT_MAPPINGS
4272 /* Apply all hypervisor mappings to the new CR3.
4273 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4274 * make sure we check for conflicts in the new CR3 root.
4275 */
4276# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4277 Assert(VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3_NON_GLOBAL) || VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
4278# endif
4279 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4280 AssertRCReturn(rc, rc);
4281# endif
4282
4283 /* Set the current hypervisor CR3. */
4284 CPUMSetHyperCR3(pVM, PGMGetHyperCR3(pVM));
4285 SELMShadowCR3Changed(pVM);
4286
4287# ifdef IN_RC
4288 VMMGCLogRestore(pVM, fLog);
4289# endif
4290
4291 /* Clean up the old CR3 root. */
4292 if (pOldShwPageCR3)
4293 {
4294 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4295# ifndef PGM_WITHOUT_MAPPINGS
4296 /* Remove the hypervisor mappings from the shadow page table. */
4297 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4298# endif
4299 /* Mark the page as unlocked; allow flushing again. */
4300 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4301
4302 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4303 }
4304
4305# endif
4306
4307 return rc;
4308}
4309
4310/**
4311 * Unmaps the shadow CR3.
4312 *
4313 * @returns VBox status, no specials.
4314 * @param pVM VM handle.
4315 */
4316PGM_BTH_DECL(int, UnmapCR3)(PVM pVM)
4317{
4318 LogFlow(("UnmapCR3\n"));
4319
4320 int rc = VINF_SUCCESS;
4321
4322 /* Update guest paging info. */
4323#if PGM_GST_TYPE == PGM_TYPE_32BIT
4324 pVM->pgm.s.pGst32BitPdR3 = 0;
4325#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4326 pVM->pgm.s.pGst32BitPdR0 = 0;
4327#endif
4328 pVM->pgm.s.pGst32BitPdRC = 0;
4329
4330#elif PGM_GST_TYPE == PGM_TYPE_PAE
4331 pVM->pgm.s.pGstPaePdptR3 = 0;
4332# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4333 pVM->pgm.s.pGstPaePdptR0 = 0;
4334# endif
4335 pVM->pgm.s.pGstPaePdptRC = 0;
4336 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4337 {
4338 pVM->pgm.s.apGstPaePDsR3[i] = 0;
4339# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4340 pVM->pgm.s.apGstPaePDsR0[i] = 0;
4341# endif
4342 pVM->pgm.s.apGstPaePDsRC[i] = 0;
4343 pVM->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4344 }
4345
4346#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4347 pVM->pgm.s.pGstAmd64Pml4R3 = 0;
4348# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4349 pVM->pgm.s.pGstAmd64Pml4R0 = 0;
4350# endif
4351
4352#else /* prot/real mode stub */
4353 /* nothing to do */
4354#endif
4355
4356#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4357 /* Update shadow paging info. */
4358# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4359 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4360 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4361
4362# if PGM_GST_TYPE != PGM_TYPE_REAL
4363 Assert(!HWACCMIsNestedPagingActive(pVM));
4364# endif
4365
4366# ifndef PGM_WITHOUT_MAPPINGS
4367 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4368 /* Remove the hypervisor mappings from the shadow page table. */
4369 pgmMapDeactivateCR3(pVM, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4370# endif
4371
4372 if (pVM->pgm.s.CTX_SUFF(pShwPageCR3))
4373 {
4374 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4375
4376 Assert(pVM->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4377
4378 /* Mark the page as unlocked; allow flushing again. */
4379 pgmPoolUnlockPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3));
4380
4381 pgmPoolFreeByPage(pPool, pVM->pgm.s.CTX_SUFF(pShwPageCR3), pVM->pgm.s.iShwUser, pVM->pgm.s.iShwUserTable);
4382 pVM->pgm.s.pShwPageCR3R3 = 0;
4383 pVM->pgm.s.pShwPageCR3R0 = 0;
4384 pVM->pgm.s.pShwPageCR3RC = 0;
4385 pVM->pgm.s.iShwUser = 0;
4386 pVM->pgm.s.iShwUserTable = 0;
4387 }
4388# endif
4389#endif /* !IN_RC*/
4390
4391 return rc;
4392}
4393
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette