VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 30891

Last change on this file since 30891 was 30891, checked in by vboxsync, 15 years ago

PGM: Cleanups related to pending MMIO/#PF optimizations. Risky.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 207.7 KB
Line 
1/* $Id: PGMAllBth.h 30891 2010-07-17 01:58:30Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
38PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
39PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
40PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
41PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
42PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
43#ifdef VBOX_STRICT
44PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
45#endif
46DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
47PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
48PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
49RT_C_DECLS_END
50
51
52/*
53 * Filter out some illegal combinations of guest and shadow paging, so we can
54 * remove redundant checks inside functions.
55 */
56#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
57# error "Invalid combination; PAE guest implies PAE shadow"
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
61 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
62# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
63#endif
64
65#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
66 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
67# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
68#endif
69
70#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
71 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
72# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
73#endif
74
75
76#ifndef IN_RING3
77/**
78 * #PF Handler for raw-mode guest execution.
79 *
80 * @returns VBox status code (appropriate for trap handling and GC return).
81 *
82 * @param pVCpu VMCPU Handle.
83 * @param uErr The trap error code.
84 * @param pRegFrame Trap register frame.
85 * @param pvFault The fault address.
86 * @param pfLockTaken PGM lock taken here or not (out)
87 */
88PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
89{
90 PVM pVM = pVCpu->CTX_SUFF(pVM);
91
92 *pfLockTaken = false;
93
94# if defined(IN_RC) && defined(VBOX_STRICT)
95 PGMDynCheckLocks(pVM);
96# endif
97
98# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
99 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
100 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
101 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
102
103# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
104 /*
105 * Hide the instruction fetch trap indicator for now.
106 */
107 /** @todo NXE will change this and we must fix NXE in the switcher too! */
108 if (uErr & X86_TRAP_PF_ID)
109 {
110 uErr &= ~X86_TRAP_PF_ID;
111 TRPMSetErrorCode(pVCpu, uErr);
112 }
113# endif
114
115 /*
116 * Get PDs.
117 */
118 int rc;
119# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
120# if PGM_GST_TYPE == PGM_TYPE_32BIT
121 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
122 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
123
124# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
125
126# if PGM_GST_TYPE == PGM_TYPE_PAE
127 unsigned iPDSrc = 0; /* initialized to shut up gcc */
128 X86PDPE PdpeSrc;
129 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, pvFault, &iPDSrc, &PdpeSrc);
130
131# elif PGM_GST_TYPE == PGM_TYPE_AMD64
132 unsigned iPDSrc = 0; /* initialized to shut up gcc */
133 PX86PML4E pPml4eSrc = NULL; /* ditto */
134 X86PDPE PdpeSrc;
135 PGSTPD pPDSrc;
136
137 pPDSrc = pgmGstGetLongModePDPtr(pVCpu, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
138 Assert(pPml4eSrc);
139# endif
140
141 /* Quick check for a valid guest trap. (PAE & AMD64) */
142 if (!pPDSrc)
143 {
144# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
145 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
146# else
147 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
148# endif
149 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
150 TRPMSetErrorCode(pVCpu, uErr);
151 return VINF_EM_RAW_GUEST_TRAP;
152 }
153# endif
154
155# else /* !PGM_WITH_PAGING */
156 PGSTPD pPDSrc = NULL;
157 const unsigned iPDSrc = 0;
158# endif /* !PGM_WITH_PAGING */
159
160# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
161 /*
162 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
163 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
164 */
165 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
166 && MMHyperIsInsideArea(pVM, pvFault))
167 {
168 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
169 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
170 return VINF_EM_RAW_EMULATE_INSTR;
171 }
172# endif
173
174 /*
175 * First check for a genuine guest page fault.
176 */
177 /** @todo This duplicates the page table walk we're doing below. Need to
178 * find some way to avoid this double work, probably by caching
179 * the data. */
180# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
181 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
182 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
183 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
184 if (rc == VINF_EM_RAW_GUEST_TRAP)
185 {
186 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
187 return rc;
188 }
189# endif /* PGM_WITH_PAGING */
190
191 /* Take the big lock now. */
192 *pfLockTaken = true;
193 pgmLock(pVM);
194
195 /*
196 * Fetch the guest PDE, PDPE and PML4E.
197 */
198# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
199 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
200# else
201 GSTPDE PdeSrc;
202 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
203 PdeSrc.n.u1Present = 1;
204 PdeSrc.n.u1Write = 1;
205 PdeSrc.n.u1Accessed = 1;
206 PdeSrc.n.u1User = 1;
207# endif
208
209# if PGM_SHW_TYPE == PGM_TYPE_32BIT
210 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
211 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
212
213# elif PGM_SHW_TYPE == PGM_TYPE_PAE
214 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
215
216 PX86PDPAE pPDDst;
217# if PGM_GST_TYPE != PGM_TYPE_PAE
218 X86PDPE PdpeSrc;
219
220 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
221 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
222# endif
223 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
224 if (rc != VINF_SUCCESS)
225 {
226 AssertRC(rc);
227 return rc;
228 }
229 Assert(pPDDst);
230
231# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
232 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
233 PX86PDPAE pPDDst;
234# if PGM_GST_TYPE == PGM_TYPE_PROT
235 /* AMD-V nested paging */
236 X86PML4E Pml4eSrc;
237 X86PDPE PdpeSrc;
238 PX86PML4E pPml4eSrc = &Pml4eSrc;
239
240 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
241 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
242 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
243# endif
244
245 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
246 if (rc != VINF_SUCCESS)
247 {
248 AssertRC(rc);
249 return rc;
250 }
251 Assert(pPDDst);
252
253# elif PGM_SHW_TYPE == PGM_TYPE_EPT
254 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
255 PEPTPD pPDDst;
256
257 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
258 if (rc != VINF_SUCCESS)
259 {
260 AssertRC(rc);
261 return rc;
262 }
263 Assert(pPDDst);
264# endif
265
266# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
267 /* Dirty page handling. */
268 if (uErr & X86_TRAP_PF_RW) /* write fault? */
269 {
270 /*
271 * If we successfully correct the write protection fault due to dirty bit
272 * tracking, then return immediately.
273 */
274 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
275 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
276 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
277 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
278 {
279 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
280 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
281 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
282 return VINF_SUCCESS;
283 }
284 }
285
286# if 0 /* rarely useful; leave for debugging. */
287 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
288# endif
289# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
290
291 /*
292 * A common case is the not-present error caused by lazy page table syncing.
293 *
294 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
295 * so we can safely assume that the shadow PT is present when calling SyncPage later.
296 *
297 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
298 * of mapping conflict and defer to SyncCR3 in R3.
299 * (Again, we do NOT support access handlers for non-present guest pages.)
300 *
301 */
302 Assert(PdeSrc.n.u1Present);
303 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
304 && !pPDDst->a[iPDDst].n.u1Present
305 )
306 {
307 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
308 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
309 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
310 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
311 if (RT_SUCCESS(rc))
312 {
313 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
314 return rc;
315 }
316 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
317 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
318 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
319 return VINF_PGM_SYNC_CR3;
320 }
321
322# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
323 /*
324 * Check if this address is within any of our mappings.
325 *
326 * This is *very* fast and it's gonna save us a bit of effort below and prevent
327 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
328 * (BTW, it's impossible to have physical access handlers in a mapping.)
329 */
330 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
331 {
332 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
333 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
334 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
335 {
336 if (pvFault < pMapping->GCPtr)
337 break;
338 if (pvFault - pMapping->GCPtr < pMapping->cb)
339 {
340 /*
341 * The first thing we check is if we've got an undetected conflict.
342 */
343 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
344 {
345 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
346 while (iPT-- > 0)
347 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
348 {
349 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
350 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
351 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
352 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
353 return VINF_PGM_SYNC_CR3;
354 }
355 }
356
357 /*
358 * Check if the fault address is in a virtual page access handler range.
359 */
360 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
361 if ( pCur
362 && pvFault - pCur->Core.Key < pCur->cb
363 && uErr & X86_TRAP_PF_RW)
364 {
365# ifdef IN_RC
366 STAM_PROFILE_START(&pCur->Stat, h);
367 pgmUnlock(pVM);
368 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
369 pgmLock(pVM);
370 STAM_PROFILE_STOP(&pCur->Stat, h);
371# else
372 AssertFailed();
373 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
374# endif
375 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
376 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
377 return rc;
378 }
379
380 /*
381 * Pretend we're not here and let the guest handle the trap.
382 */
383 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
384 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
385 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
386 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
387 return VINF_EM_RAW_GUEST_TRAP;
388 }
389 }
390 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
391 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
392# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
393
394 /*
395 * Check if this fault address is flagged for special treatment,
396 * which means we'll have to figure out the physical address and
397 * check flags associated with it.
398 *
399 * ASSUME that we can limit any special access handling to pages
400 * in page tables which the guest believes to be present.
401 */
402 Assert(PdeSrc.n.u1Present);
403 {
404 RTGCPHYS GCPhys = NIL_RTGCPHYS;
405
406# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
407 if ( PdeSrc.b.u1Size
408# if PGM_GST_TYPE == PGM_TYPE_32BIT
409 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
410# endif
411 )
412 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc)
413 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
414 else
415 {
416 PGSTPT pPTSrc;
417 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
418 if (RT_SUCCESS(rc))
419 {
420 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
421 if (pPTSrc->a[iPTESrc].n.u1Present)
422 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
423 }
424 }
425# else
426 /* No paging so the fault address is the physical address */
427 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
428# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
429
430 /*
431 * If we have a GC address we'll check if it has any flags set.
432 */
433 if (GCPhys != NIL_RTGCPHYS)
434 {
435 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
436
437 PPGMPAGE pPage;
438 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
439 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
440 {
441 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
442 {
443 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
444 {
445 /*
446 * Physical page access handler.
447 */
448 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
449 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
450 if (pCur)
451 {
452# ifdef PGM_SYNC_N_PAGES
453 /*
454 * If the region is write protected and we got a page not present fault, then sync
455 * the pages. If the fault was caused by a read, then restart the instruction.
456 * In case of write access continue to the GC write handler.
457 *
458 * ASSUMES that there is only one handler per page or that they have similar write properties.
459 */
460 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
461 && !(uErr & X86_TRAP_PF_P))
462 {
463 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
464 if ( RT_FAILURE(rc)
465 || !(uErr & X86_TRAP_PF_RW)
466 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
467 {
468 AssertRC(rc);
469 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
470 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
471 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
472 return rc;
473 }
474 }
475# endif
476
477 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
478 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
479 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
480
481# if defined(IN_RC) || defined(IN_RING0)
482 if (pCur->CTX_SUFF(pfnHandler))
483 {
484 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
485# ifdef IN_RING0
486 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
487# else
488 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
489# endif
490 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
491 void *pvUser = pCur->CTX_SUFF(pvUser);
492
493 STAM_PROFILE_START(&pCur->Stat, h);
494 if (fLeaveLock)
495 pgmUnlock(pVM); /* @todo: Not entirely safe. */
496
497 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
498 if (fLeaveLock)
499 pgmLock(pVM);
500# ifdef VBOX_WITH_STATISTICS
501 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
502 if (pCur)
503 STAM_PROFILE_STOP(&pCur->Stat, h);
504# else
505 pCur = NULL; /* might be invalid by now. */
506# endif
507
508 }
509 else
510# endif
511 rc = VINF_EM_RAW_EMULATE_INSTR;
512
513 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
514 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
515 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
516 return rc;
517 }
518 }
519# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
520 else
521 {
522# ifdef PGM_SYNC_N_PAGES
523 /*
524 * If the region is write protected and we got a page not present fault, then sync
525 * the pages. If the fault was caused by a read, then restart the instruction.
526 * In case of write access continue to the GC write handler.
527 */
528 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
529 && !(uErr & X86_TRAP_PF_P))
530 {
531 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
532 if ( RT_FAILURE(rc)
533 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
534 || !(uErr & X86_TRAP_PF_RW))
535 {
536 AssertRC(rc);
537 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
538 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
539 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
540 return rc;
541 }
542 }
543# endif
544 /*
545 * Ok, it's an virtual page access handler.
546 *
547 * Since it's faster to search by address, we'll do that first
548 * and then retry by GCPhys if that fails.
549 */
550 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
551 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
552 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
553 */
554 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
555 if (pCur)
556 {
557 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
558 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
559 || !(uErr & X86_TRAP_PF_P)
560 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
561 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
562
563 if ( pvFault - pCur->Core.Key < pCur->cb
564 && ( uErr & X86_TRAP_PF_RW
565 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
566 {
567# ifdef IN_RC
568 STAM_PROFILE_START(&pCur->Stat, h);
569 pgmUnlock(pVM);
570 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
571 pgmLock(pVM);
572 STAM_PROFILE_STOP(&pCur->Stat, h);
573# else
574 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
575# endif
576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
577 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
578 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
579 return rc;
580 }
581 /* Unhandled part of a monitored page */
582 }
583 else
584 {
585 /* Check by physical address. */
586 unsigned iPage;
587 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
588 &pCur, &iPage);
589 Assert(RT_SUCCESS(rc) || !pCur);
590 if ( pCur
591 && ( uErr & X86_TRAP_PF_RW
592 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
593 {
594 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
595# ifdef IN_RC
596 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
597 Assert(off < pCur->cb);
598 STAM_PROFILE_START(&pCur->Stat, h);
599 pgmUnlock(pVM);
600 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
601 pgmLock(pVM);
602 STAM_PROFILE_STOP(&pCur->Stat, h);
603# else
604 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
605# endif
606 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
607 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
608 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
609 return rc;
610 }
611 }
612 }
613# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
614
615 /*
616 * There is a handled area of the page, but this fault doesn't belong to it.
617 * We must emulate the instruction.
618 *
619 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
620 * we first check if this was a page-not-present fault for a page with only
621 * write access handlers. Restart the instruction if it wasn't a write access.
622 */
623 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
624
625 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
626 && !(uErr & X86_TRAP_PF_P))
627 {
628 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
629 if ( RT_FAILURE(rc)
630 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
631 || !(uErr & X86_TRAP_PF_RW))
632 {
633 AssertRC(rc);
634 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
635 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
636 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
637 return rc;
638 }
639 }
640
641 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
642 * It's writing to an unhandled part of the LDT page several million times.
643 */
644 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
645 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
646 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
647 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
648 return rc;
649 } /* if any kind of handler */
650
651# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
652 if (uErr & X86_TRAP_PF_P)
653 {
654 /*
655 * The page isn't marked, but it might still be monitored by a virtual page access handler.
656 * (ASSUMES no temporary disabling of virtual handlers.)
657 */
658 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
659 * we should correct both the shadow page table and physical memory flags, and not only check for
660 * accesses within the handler region but for access to pages with virtual handlers. */
661 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
662 if (pCur)
663 {
664 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
665 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
666 || !(uErr & X86_TRAP_PF_P)
667 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
668 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
669
670 if ( pvFault - pCur->Core.Key < pCur->cb
671 && ( uErr & X86_TRAP_PF_RW
672 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
673 {
674# ifdef IN_RC
675 STAM_PROFILE_START(&pCur->Stat, h);
676 pgmUnlock(pVM);
677 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
678 pgmLock(pVM);
679 STAM_PROFILE_STOP(&pCur->Stat, h);
680# else
681 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
682# endif
683 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
684 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
685 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
686 return rc;
687 }
688 }
689 }
690# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
691 }
692 else
693 {
694 /*
695 * When the guest accesses invalid physical memory (e.g. probing
696 * of RAM or accessing a remapped MMIO range), then we'll fall
697 * back to the recompiler to emulate the instruction.
698 */
699 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
700 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
701 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
702 return VINF_EM_RAW_EMULATE_INSTR;
703 }
704
705 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
706
707# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
708 /*
709 * We are here only if page is present in Guest page tables and
710 * trap is not handled by our handlers.
711 *
712 * Check it for page out-of-sync situation.
713 */
714 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
715
716 if (!(uErr & X86_TRAP_PF_P))
717 {
718 /*
719 * Page is not present in our page tables.
720 * Try to sync it!
721 * BTW, fPageShw is invalid in this branch!
722 */
723 if (uErr & X86_TRAP_PF_US)
724 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
725 else /* supervisor */
726 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
727
728 if (PGM_PAGE_IS_BALLOONED(pPage))
729 {
730 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
731 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
732 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
733 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
734 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
735 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
736 return rc;
737 }
738# if defined(LOG_ENABLED) && !defined(IN_RING0)
739 RTGCPHYS GCPhys2;
740 uint64_t fPageGst2;
741 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
742 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
743 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
744# endif /* LOG_ENABLED */
745
746# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
747 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
748 {
749 uint64_t fPageGst;
750 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
751 if ( RT_SUCCESS(rc)
752 && !(fPageGst & X86_PTE_US))
753 {
754 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
755 if ( pvFault == (RTGCPTR)pRegFrame->eip
756 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
757# ifdef CSAM_DETECT_NEW_CODE_PAGES
758 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
759 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
760# endif /* CSAM_DETECT_NEW_CODE_PAGES */
761 )
762 {
763 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
764 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
765 if (rc != VINF_SUCCESS)
766 {
767 /*
768 * CSAM needs to perform a job in ring 3.
769 *
770 * Sync the page before going to the host context; otherwise we'll end up in a loop if
771 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
772 */
773 LogFlow(("CSAM ring 3 job\n"));
774 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
775 AssertRC(rc2);
776
777 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
778 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
779 return rc;
780 }
781 }
782# ifdef CSAM_DETECT_NEW_CODE_PAGES
783 else if ( uErr == X86_TRAP_PF_RW
784 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
785 && pRegFrame->ecx < 0x10000)
786 {
787 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
788 * to detect loading of new code pages.
789 */
790
791 /*
792 * Decode the instruction.
793 */
794 RTGCPTR PC;
795 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
796 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
797 if (rc == VINF_SUCCESS)
798 {
799 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
800 uint32_t cbOp;
801 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
802
803 /* For now we'll restrict this to rep movsw/d instructions */
804 if ( rc == VINF_SUCCESS
805 && pDis->pCurInstr->opcode == OP_MOVSWD
806 && (pDis->prefix & PREFIX_REP))
807 {
808 CSAMMarkPossibleCodePage(pVM, pvFault);
809 }
810 }
811 }
812# endif /* CSAM_DETECT_NEW_CODE_PAGES */
813
814 /*
815 * Mark this page as safe.
816 */
817 /** @todo not correct for pages that contain both code and data!! */
818 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
819 CSAMMarkPage(pVM, pvFault, true);
820 }
821 }
822# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
823 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
824 if (RT_SUCCESS(rc))
825 {
826 /* The page was successfully synced, return to the guest. */
827 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
828 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
829 return VINF_SUCCESS;
830 }
831 }
832 else /* uErr & X86_TRAP_PF_P: */
833 {
834 /*
835 * Write protected pages are make writable when the guest makes the first
836 * write to it. This happens for pages that are shared, write monitored
837 * and not yet allocated.
838 *
839 * Also, a side effect of not flushing global PDEs are out of sync pages due
840 * to physical monitored regions, that are no longer valid.
841 * Assume for now it only applies to the read/write flag.
842 */
843 if ( RT_SUCCESS(rc)
844 && (uErr & X86_TRAP_PF_RW))
845 {
846 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
847 {
848 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
849 Assert(!PGM_PAGE_IS_ZERO(pPage));
850 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
851
852 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
853 if (rc != VINF_SUCCESS)
854 {
855 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
856 return rc;
857 }
858 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
859 return VINF_EM_NO_MEMORY;
860 }
861
862# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
863 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
864 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
865 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
866 {
867 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
868 uint64_t fPageGst;
869 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
870 if ( RT_SUCCESS(rc)
871 && !(fPageGst & X86_PTE_RW))
872 {
873 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
874 if (RT_SUCCESS(rc))
875 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
876 else
877 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
878 return rc;
879 }
880 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
881 }
882# endif
883 /// @todo count the above case; else
884 if (uErr & X86_TRAP_PF_US)
885 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
886 else /* supervisor */
887 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
888
889 /*
890 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
891 * page is not present, which is not true in this case.
892 */
893 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
894 if (RT_SUCCESS(rc))
895 {
896 /*
897 * Page was successfully synced, return to guest.
898 * First invalidate the page as it might be in the TLB.
899 */
900# if PGM_SHW_TYPE == PGM_TYPE_EPT
901 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
902# else
903 PGM_INVL_PG(pVCpu, pvFault);
904# endif
905# ifdef VBOX_STRICT
906 RTGCPHYS GCPhys2;
907 uint64_t fPageGst;
908 if (!HWACCMIsNestedPagingActive(pVM))
909 {
910 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
911 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
912 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
913 }
914 uint64_t fPageShw;
915 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
916 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
917# endif /* VBOX_STRICT */
918 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
919 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
920 return VINF_SUCCESS;
921 }
922 }
923
924# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
925# ifdef VBOX_STRICT
926 /*
927 * Check for VMM page flags vs. Guest page flags consistency.
928 * Currently only for debug purposes.
929 */
930 if (RT_SUCCESS(rc))
931 {
932 /* Get guest page flags. */
933 uint64_t fPageGst;
934 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
935 if (RT_SUCCESS(rc))
936 {
937 uint64_t fPageShw;
938 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
939
940 /*
941 * Compare page flags.
942 * Note: we have AVL, A, D bits desynched.
943 */
944 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
945 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
946 }
947 else
948 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
949 }
950 else
951 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
952# endif /* VBOX_STRICT */
953# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
954 }
955 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
956# endif /* PGM_OUT_OF_SYNC_IN_GC */
957 }
958 else /* GCPhys == NIL_RTGCPHYS */
959 {
960 /*
961 * Page not present in Guest OS or invalid page table address.
962 * This is potential virtual page access handler food.
963 *
964 * For the present we'll say that our access handlers don't
965 * work for this case - we've already discarded the page table
966 * not present case which is identical to this.
967 *
968 * When we perchance find we need this, we will probably have AVL
969 * trees (offset based) to operate on and we can measure their speed
970 * agains mapping a page table and probably rearrange this handling
971 * a bit. (Like, searching virtual ranges before checking the
972 * physical address.)
973 */
974 }
975 }
976
977# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
978 /*
979 * Conclusion, this is a guest trap.
980 */
981 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
982 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
983 return VINF_EM_RAW_GUEST_TRAP;
984# else
985 /* present, but not a monitored page; perhaps the guest is probing physical memory */
986 return VINF_EM_RAW_EMULATE_INSTR;
987# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
988
989
990# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
991
992 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
993 return VERR_INTERNAL_ERROR;
994# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
995}
996#endif /* !IN_RING3 */
997
998
999/**
1000 * Emulation of the invlpg instruction.
1001 *
1002 *
1003 * @returns VBox status code.
1004 *
1005 * @param pVCpu The VMCPU handle.
1006 * @param GCPtrPage Page to invalidate.
1007 *
1008 * @remark ASSUMES that the guest is updating before invalidating. This order
1009 * isn't required by the CPU, so this is speculative and could cause
1010 * trouble.
1011 * @remark No TLB shootdown is done on any other VCPU as we assume that
1012 * invlpg emulation is the *only* reason for calling this function.
1013 * (The guest has to shoot down TLB entries on other CPUs itself)
1014 * Currently true, but keep in mind!
1015 *
1016 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1017 */
1018PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1019{
1020#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1021 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1022 && PGM_SHW_TYPE != PGM_TYPE_EPT
1023 int rc;
1024 PVM pVM = pVCpu->CTX_SUFF(pVM);
1025 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1026
1027 Assert(PGMIsLockOwner(pVM));
1028
1029 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1030
1031# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1032 if (pPool->cDirtyPages)
1033 pgmPoolResetDirtyPages(pVM);
1034# endif
1035
1036 /*
1037 * Get the shadow PD entry and skip out if this PD isn't present.
1038 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1039 */
1040# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1041 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1042 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1043
1044 /* Fetch the pgm pool shadow descriptor. */
1045 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1046 Assert(pShwPde);
1047
1048# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1049 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1050 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1051
1052 /* If the shadow PDPE isn't present, then skip the invalidate. */
1053 if (!pPdptDst->a[iPdpt].n.u1Present)
1054 {
1055 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1056 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1057 return VINF_SUCCESS;
1058 }
1059
1060 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1061 PPGMPOOLPAGE pShwPde = NULL;
1062 PX86PDPAE pPDDst;
1063
1064 /* Fetch the pgm pool shadow descriptor. */
1065 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1066 AssertRCSuccessReturn(rc, rc);
1067 Assert(pShwPde);
1068
1069 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1070 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1071
1072# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1073 /* PML4 */
1074 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1075 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1076 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1077 PX86PDPAE pPDDst;
1078 PX86PDPT pPdptDst;
1079 PX86PML4E pPml4eDst;
1080 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1081 if (rc != VINF_SUCCESS)
1082 {
1083 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1084 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1085 return VINF_SUCCESS;
1086 }
1087 Assert(pPDDst);
1088
1089 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1090 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1091
1092 if (!pPdpeDst->n.u1Present)
1093 {
1094 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1095 return VINF_SUCCESS;
1096 }
1097
1098 /* Fetch the pgm pool shadow descriptor. */
1099 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1100 Assert(pShwPde);
1101
1102# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1103
1104 const SHWPDE PdeDst = *pPdeDst;
1105 if (!PdeDst.n.u1Present)
1106 {
1107 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1108 return VINF_SUCCESS;
1109 }
1110
1111# if defined(IN_RC)
1112 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1113 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1114# endif
1115
1116 /*
1117 * Get the guest PD entry and calc big page.
1118 */
1119# if PGM_GST_TYPE == PGM_TYPE_32BIT
1120 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1121 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1122 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1123# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1124 unsigned iPDSrc = 0;
1125# if PGM_GST_TYPE == PGM_TYPE_PAE
1126 X86PDPE PdpeSrc;
1127 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
1128# else /* AMD64 */
1129 PX86PML4E pPml4eSrc;
1130 X86PDPE PdpeSrc;
1131 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
1132# endif
1133 GSTPDE PdeSrc;
1134
1135 if (pPDSrc)
1136 PdeSrc = pPDSrc->a[iPDSrc];
1137 else
1138 PdeSrc.u = 0;
1139# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1140
1141# if PGM_GST_TYPE == PGM_TYPE_32BIT
1142 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1143# else
1144 const bool fIsBigPage = PdeSrc.b.u1Size;
1145# endif
1146
1147# ifdef IN_RING3
1148 /*
1149 * If a CR3 Sync is pending we may ignore the invalidate page operation
1150 * depending on the kind of sync and if it's a global page or not.
1151 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1152 */
1153# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1154 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1155 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1156 && fIsBigPage
1157 && PdeSrc.b.u1Global
1158 )
1159 )
1160# else
1161 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1162# endif
1163 {
1164 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1165 return VINF_SUCCESS;
1166 }
1167# endif /* IN_RING3 */
1168
1169 /*
1170 * Deal with the Guest PDE.
1171 */
1172 rc = VINF_SUCCESS;
1173 if (PdeSrc.n.u1Present)
1174 {
1175 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1176 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1177# ifndef PGM_WITHOUT_MAPPING
1178 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1179 {
1180 /*
1181 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1182 */
1183 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1184 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1185 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1186 }
1187 else
1188# endif /* !PGM_WITHOUT_MAPPING */
1189 if (!fIsBigPage)
1190 {
1191 /*
1192 * 4KB - page.
1193 */
1194 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1195 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1196
1197# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1198 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1199 if (pShwPage->cModifications)
1200 pShwPage->cModifications = 1;
1201# endif
1202
1203# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1204 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1205 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1206# endif
1207 if (pShwPage->GCPhys == GCPhys)
1208 {
1209# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1210 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1211 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1212 if (pPT->a[iPTEDst].n.u1Present)
1213 {
1214 /* This is very unlikely with caching/monitoring enabled. */
1215 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1216 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1217 }
1218# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1219 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1220 if (RT_SUCCESS(rc))
1221 rc = VINF_SUCCESS;
1222# endif
1223 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1224 PGM_INVL_PG(pVCpu, GCPtrPage);
1225 }
1226 else
1227 {
1228 /*
1229 * The page table address changed.
1230 */
1231 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1232 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1233 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1234 ASMAtomicWriteSize(pPdeDst, 0);
1235 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1236 PGM_INVL_VCPU_TLBS(pVCpu);
1237 }
1238 }
1239 else
1240 {
1241 /*
1242 * 2/4MB - page.
1243 */
1244 /* Before freeing the page, check if anything really changed. */
1245 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1246 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1247# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1248 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1249 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1250# endif
1251 if ( pShwPage->GCPhys == GCPhys
1252 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1253 {
1254 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1255 /** @todo PAT */
1256 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1257 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1258 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1259 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1260 {
1261 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1262 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1263# if defined(IN_RC)
1264 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1265 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1266# endif
1267 return VINF_SUCCESS;
1268 }
1269 }
1270
1271 /*
1272 * Ok, the page table is present and it's been changed in the guest.
1273 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1274 * We could do this for some flushes in GC too, but we need an algorithm for
1275 * deciding which 4MB pages containing code likely to be executed very soon.
1276 */
1277 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1278 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1279 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1280 ASMAtomicWriteSize(pPdeDst, 0);
1281 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1282 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1283 }
1284 }
1285 else
1286 {
1287 /*
1288 * Page directory is not present, mark shadow PDE not present.
1289 */
1290 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1291 {
1292 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1293 ASMAtomicWriteSize(pPdeDst, 0);
1294 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1295 PGM_INVL_PG(pVCpu, GCPtrPage);
1296 }
1297 else
1298 {
1299 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1300 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1301 }
1302 }
1303# if defined(IN_RC)
1304 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1305 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1306# endif
1307 return rc;
1308
1309#else /* guest real and protected mode */
1310 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1311 return VINF_SUCCESS;
1312#endif
1313}
1314
1315
1316/**
1317 * Update the tracking of shadowed pages.
1318 *
1319 * @param pVCpu The VMCPU handle.
1320 * @param pShwPage The shadow page.
1321 * @param HCPhys The physical page we is being dereferenced.
1322 * @param iPte Shadow PTE index
1323 */
1324DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1325{
1326 PVM pVM = pVCpu->CTX_SUFF(pVM);
1327
1328 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1329 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1330
1331 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1332 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1333 * 2. write protect all shadowed pages. I.e. implement caching.
1334 */
1335 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1336
1337 /*
1338 * Find the guest address.
1339 */
1340 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1341 pRam;
1342 pRam = pRam->CTX_SUFF(pNext))
1343 {
1344 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1345 while (iPage-- > 0)
1346 {
1347 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1348 {
1349 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1350
1351 Assert(pShwPage->cPresent);
1352 Assert(pPool->cPresent);
1353 pShwPage->cPresent--;
1354 pPool->cPresent--;
1355
1356 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1357 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1358 return;
1359 }
1360 }
1361 }
1362
1363 for (;;)
1364 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1365}
1366
1367
1368/**
1369 * Update the tracking of shadowed pages.
1370 *
1371 * @param pVCpu The VMCPU handle.
1372 * @param pShwPage The shadow page.
1373 * @param u16 The top 16-bit of the pPage->HCPhys.
1374 * @param pPage Pointer to the guest page. this will be modified.
1375 * @param iPTDst The index into the shadow table.
1376 */
1377DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1378{
1379 PVM pVM = pVCpu->CTX_SUFF(pVM);
1380 /*
1381 * Just deal with the simple first time here.
1382 */
1383 if (!u16)
1384 {
1385 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1386 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1387 /* Save the page table index. */
1388 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1389 }
1390 else
1391 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1392
1393 /* write back */
1394 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1395 PGM_PAGE_SET_TRACKING(pPage, u16);
1396
1397 /* update statistics. */
1398 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1399 pShwPage->cPresent++;
1400 if (pShwPage->iFirstPresent > iPTDst)
1401 pShwPage->iFirstPresent = iPTDst;
1402}
1403
1404
1405/**
1406 * Modifies a shadow PTE to account for access handlers.
1407 *
1408 * @param pVM The VM handle.
1409 * @param pPage The page in question.
1410 * @param fPteSrc The flags of the source PTE.
1411 * @param pPteDst The shadow PTE (output).
1412 */
1413DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint32_t fPteSrc, PSHWPTE pPteDst)
1414{
1415 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1416 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1417 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1418 {
1419#if PGM_SHW_TYPE == PGM_TYPE_EPT
1420 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1421 pPteDst->n.u1Present = 1;
1422 pPteDst->n.u1Execute = 1;
1423 pPteDst->n.u1IgnorePAT = 1;
1424 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1425 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1426#else
1427 pPteDst->u = (fPteSrc & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1428 | PGM_PAGE_GET_HCPHYS(pPage);
1429#endif
1430 }
1431#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1432# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1433 else if ( PGM_PAGE_IS_MMIO(pPage)
1434# if PGM_SHW_TYPE != PGM_TYPE_EPT
1435 && ( (fPteSrc & (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/ | X86_PTE_US )) /* #PF handles D & A first. */
1436 == (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/)
1437 || BTH_IS_NP_ACTIVE(pVM) )
1438# endif
1439# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1440 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1441# endif
1442 )
1443 {
1444 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1445# if PGM_SHW_TYPE == PGM_TYPE_EPT
1446 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1447 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1448 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1449 pPteDst->n.u1Present = 0;
1450 pPteDst->n.u1Write = 1;
1451 pPteDst->n.u1Execute = 0;
1452 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1453 pPteDst->n.u3EMT = 7;
1454# else
1455 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1456 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P;
1457# endif
1458 }
1459# endif
1460#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1461 else
1462 {
1463 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1464 pPteDst->u = 0;
1465 }
1466 /** @todo count these kinds of entries. */
1467}
1468
1469
1470/**
1471 * Creates a 4K shadow page for a guest page.
1472 *
1473 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1474 * physical address. The PdeSrc argument only the flags are used. No page
1475 * structured will be mapped in this function.
1476 *
1477 * @param pVCpu The VMCPU handle.
1478 * @param pPteDst Destination page table entry.
1479 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1480 * Can safely assume that only the flags are being used.
1481 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1482 * @param pShwPage Pointer to the shadow page.
1483 * @param iPTDst The index into the shadow table.
1484 *
1485 * @remark Not used for 2/4MB pages!
1486 */
1487DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1488 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1489{
1490 if ( PteSrc.n.u1Present
1491 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1492 {
1493 PVM pVM = pVCpu->CTX_SUFF(pVM);
1494
1495# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1496 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1497 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1498 if (pShwPage->fDirty)
1499 {
1500 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1501 PX86PTPAE pGstPT;
1502
1503 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1504 pGstPT->a[iPTDst].u = PteSrc.u;
1505 }
1506# endif
1507 /*
1508 * Find the ram range.
1509 */
1510 PPGMPAGE pPage;
1511 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1512 if (RT_SUCCESS(rc))
1513 {
1514 /* Ignore ballooned pages.
1515 Don't return errors or use a fatal assert here as part of a
1516 shadow sync range might included ballooned pages. */
1517 if (PGM_PAGE_IS_BALLOONED(pPage))
1518 {
1519 Assert(!pPteDst->n.u1Present); /** @todo user tracking needs updating if this triggers. */
1520 return;
1521 }
1522
1523#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1524 /* Make the page writable if necessary. */
1525 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1526 && ( PGM_PAGE_IS_ZERO(pPage)
1527 || ( PteSrc.n.u1Write
1528 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1529# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1530 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1531# endif
1532# ifdef VBOX_WITH_PAGE_SHARING
1533 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1534# endif
1535 )
1536 )
1537 )
1538 {
1539 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1540 AssertRC(rc);
1541 }
1542#endif
1543
1544 /*
1545 * Make page table entry.
1546 */
1547 SHWPTE PteDst;
1548 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1549 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1550 PteSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT
1551 | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW),
1552 &PteDst);
1553 else
1554 {
1555#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1556 /*
1557 * If the page or page directory entry is not marked accessed,
1558 * we mark the page not present.
1559 */
1560 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1561 {
1562 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1563 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1564 PteDst.u = 0;
1565 }
1566 /*
1567 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1568 * when the page is modified.
1569 */
1570 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1571 {
1572 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1573 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1574 | PGM_PAGE_GET_HCPHYS(pPage)
1575 | PGM_PTFLAGS_TRACK_DIRTY;
1576 }
1577 else
1578#endif
1579 {
1580 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1581#if PGM_SHW_TYPE == PGM_TYPE_EPT
1582 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1583 PteDst.n.u1Present = 1;
1584 PteDst.n.u1Write = 1;
1585 PteDst.n.u1Execute = 1;
1586 PteDst.n.u1IgnorePAT = 1;
1587 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1588 /* PteDst.n.u1Size = 0 */
1589#else
1590 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1591 | PGM_PAGE_GET_HCPHYS(pPage);
1592#endif
1593 }
1594
1595 /*
1596 * Make sure only allocated pages are mapped writable.
1597 */
1598 if ( PteDst.n.u1Write
1599 && PteDst.n.u1Present
1600 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1601 {
1602 /* Still applies to shared pages. */
1603 Assert(!PGM_PAGE_IS_ZERO(pPage));
1604 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. Why, isn't it? */
1605 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1606 }
1607 }
1608
1609 /*
1610 * Keep user track up to date.
1611 */
1612 if (PteDst.n.u1Present)
1613 {
1614 if (!pPteDst->n.u1Present)
1615 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1616 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1617 {
1618 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1619 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1620 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1621 }
1622 }
1623 else if (pPteDst->n.u1Present)
1624 {
1625 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1626 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1627 }
1628
1629 /*
1630 * Update statistics and commit the entry.
1631 */
1632#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1633 if (!PteSrc.n.u1Global)
1634 pShwPage->fSeenNonGlobal = true;
1635#endif
1636 ASMAtomicWriteSize(pPteDst, PteDst.u);
1637 return;
1638 }
1639
1640/** @todo count these three different kinds. */
1641 Log2(("SyncPageWorker: invalid address in Pte\n"));
1642 }
1643 else if (!PteSrc.n.u1Present)
1644 Log2(("SyncPageWorker: page not present in Pte\n"));
1645 else
1646 Log2(("SyncPageWorker: invalid Pte\n"));
1647
1648 /*
1649 * The page is not present or the PTE is bad. Replace the shadow PTE by
1650 * an empty entry, making sure to keep the user tracking up to date.
1651 */
1652 if (pPteDst->n.u1Present)
1653 {
1654 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1655 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1656 }
1657 ASMAtomicWriteSize(pPteDst, 0);
1658}
1659
1660
1661/**
1662 * Syncs a guest OS page.
1663 *
1664 * There are no conflicts at this point, neither is there any need for
1665 * page table allocations.
1666 *
1667 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1668 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1669 *
1670 * @returns VBox status code.
1671 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1672 * @param pVCpu The VMCPU handle.
1673 * @param PdeSrc Page directory entry of the guest.
1674 * @param GCPtrPage Guest context page address.
1675 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1676 * @param uErr Fault error (X86_TRAP_PF_*).
1677 */
1678PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1679{
1680 PVM pVM = pVCpu->CTX_SUFF(pVM);
1681 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1682 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1683
1684 Assert(PGMIsLockOwner(pVM));
1685
1686#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1687 || PGM_GST_TYPE == PGM_TYPE_PAE \
1688 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1689 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1690 && PGM_SHW_TYPE != PGM_TYPE_EPT
1691
1692 /*
1693 * Assert preconditions.
1694 */
1695 Assert(PdeSrc.n.u1Present);
1696 Assert(cPages);
1697# if 0 /* rarely useful; leave for debugging. */
1698 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1699# endif
1700
1701 /*
1702 * Get the shadow PDE, find the shadow page table in the pool.
1703 */
1704# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1705 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1706 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1707
1708 /* Fetch the pgm pool shadow descriptor. */
1709 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1710 Assert(pShwPde);
1711
1712# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1713 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1714 PPGMPOOLPAGE pShwPde = NULL;
1715 PX86PDPAE pPDDst;
1716
1717 /* Fetch the pgm pool shadow descriptor. */
1718 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1719 AssertRCSuccessReturn(rc2, rc2);
1720 Assert(pShwPde);
1721
1722 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1723 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1724
1725# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1726 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1727 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1728 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1729 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1730
1731 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1732 AssertRCSuccessReturn(rc2, rc2);
1733 Assert(pPDDst && pPdptDst);
1734 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1735# endif
1736 SHWPDE PdeDst = *pPdeDst;
1737
1738 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1739 if (!PdeDst.n.u1Present)
1740 {
1741 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1742 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1743 return VINF_SUCCESS; /* force the instruction to be executed again. */
1744 }
1745
1746 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1747 Assert(pShwPage);
1748
1749# if PGM_GST_TYPE == PGM_TYPE_AMD64
1750 /* Fetch the pgm pool shadow descriptor. */
1751 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1752 Assert(pShwPde);
1753# endif
1754
1755# if defined(IN_RC)
1756 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1757 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1758# endif
1759
1760 /*
1761 * Check that the page is present and that the shadow PDE isn't out of sync.
1762 */
1763# if PGM_GST_TYPE == PGM_TYPE_32BIT
1764 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1765# else
1766 const bool fBigPage = PdeSrc.b.u1Size;
1767# endif
1768 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1769 RTGCPHYS GCPhys;
1770 if (!fBigPage)
1771 {
1772 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1773# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1774 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1775 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1776# endif
1777 }
1778 else
1779 {
1780 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1781# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1782 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1783 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1784# endif
1785 }
1786 if ( fPdeValid
1787 && pShwPage->GCPhys == GCPhys
1788 && PdeSrc.n.u1Present
1789 && PdeSrc.n.u1User == PdeDst.n.u1User
1790 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1791# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1792 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1793# endif
1794 )
1795 {
1796 /*
1797 * Check that the PDE is marked accessed already.
1798 * Since we set the accessed bit *before* getting here on a #PF, this
1799 * check is only meant for dealing with non-#PF'ing paths.
1800 */
1801 if (PdeSrc.n.u1Accessed)
1802 {
1803 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1804 if (!fBigPage)
1805 {
1806 /*
1807 * 4KB Page - Map the guest page table.
1808 */
1809 PGSTPT pPTSrc;
1810 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1811 if (RT_SUCCESS(rc))
1812 {
1813# ifdef PGM_SYNC_N_PAGES
1814 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1815 if ( cPages > 1
1816 && !(uErr & X86_TRAP_PF_P)
1817 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1818 {
1819 /*
1820 * This code path is currently only taken when the caller is PGMTrap0eHandler
1821 * for non-present pages!
1822 *
1823 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1824 * deal with locality.
1825 */
1826 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1827# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1828 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1829 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1830# else
1831 const unsigned offPTSrc = 0;
1832# endif
1833 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1834 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1835 iPTDst = 0;
1836 else
1837 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1838 for (; iPTDst < iPTDstEnd; iPTDst++)
1839 {
1840 if (!pPTDst->a[iPTDst].n.u1Present)
1841 {
1842 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1843 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1844 NOREF(GCPtrCurPage);
1845#ifndef IN_RING0
1846 /*
1847 * Assuming kernel code will be marked as supervisor - and not as user level
1848 * and executed using a conforming code selector - And marked as readonly.
1849 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1850 */
1851 PPGMPAGE pPage;
1852 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1853 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1854 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1855 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1856 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1857 )
1858#endif /* else: CSAM not active */
1859 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1860 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1861 GCPtrCurPage, PteSrc.n.u1Present,
1862 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1863 PteSrc.n.u1User & PdeSrc.n.u1User,
1864 (uint64_t)PteSrc.u,
1865 (uint64_t)pPTDst->a[iPTDst].u,
1866 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1867 }
1868 }
1869 }
1870 else
1871# endif /* PGM_SYNC_N_PAGES */
1872 {
1873 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1874 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1875 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1876 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1877 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1878 GCPtrPage, PteSrc.n.u1Present,
1879 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1880 PteSrc.n.u1User & PdeSrc.n.u1User,
1881 (uint64_t)PteSrc.u,
1882 (uint64_t)pPTDst->a[iPTDst].u,
1883 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1884 }
1885 }
1886 else /* MMIO or invalid page: emulated in #PF handler. */
1887 {
1888 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1889 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1890 }
1891 }
1892 else
1893 {
1894 /*
1895 * 4/2MB page - lazy syncing shadow 4K pages.
1896 * (There are many causes of getting here, it's no longer only CSAM.)
1897 */
1898 /* Calculate the GC physical address of this 4KB shadow page. */
1899 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1900 /* Find ram range. */
1901 PPGMPAGE pPage;
1902 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1903 if (RT_SUCCESS(rc))
1904 {
1905 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1906
1907# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1908 /* Try to make the page writable if necessary. */
1909 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1910 && ( PGM_PAGE_IS_ZERO(pPage)
1911 || ( PdeSrc.n.u1Write
1912 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1913# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1914 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1915# endif
1916# ifdef VBOX_WITH_PAGE_SHARING
1917 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1918# endif
1919 )
1920 )
1921 )
1922 {
1923 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1924 AssertRC(rc);
1925 }
1926# endif
1927
1928 /*
1929 * Make shadow PTE entry.
1930 */
1931 SHWPTE PteDst;
1932 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1933 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1934 PdeSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK
1935 | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT),
1936 &PteDst);
1937 else
1938 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1939 | PGM_PAGE_GET_HCPHYS(pPage);
1940
1941 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1942 if ( PteDst.n.u1Present
1943 && !pPTDst->a[iPTDst].n.u1Present)
1944 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1945
1946 /* Make sure only allocated pages are mapped writable. */
1947 if ( PteDst.n.u1Write
1948 && PteDst.n.u1Present
1949 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1950 {
1951 /* Still applies to shared pages. */
1952 Assert(!PGM_PAGE_IS_ZERO(pPage));
1953 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1954 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1955 }
1956
1957 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1958
1959 /*
1960 * If the page is not flagged as dirty and is writable, then make it read-only
1961 * at PD level, so we can set the dirty bit when the page is modified.
1962 *
1963 * ASSUMES that page access handlers are implemented on page table entry level.
1964 * Thus we will first catch the dirty access and set PDE.D and restart. If
1965 * there is an access handler, we'll trap again and let it work on the problem.
1966 */
1967 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1968 * As for invlpg, it simply frees the whole shadow PT.
1969 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1970 if ( !PdeSrc.b.u1Dirty
1971 && PdeSrc.b.u1Write)
1972 {
1973 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1974 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1975 PdeDst.n.u1Write = 0;
1976 }
1977 else
1978 {
1979 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1980 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1981 }
1982 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1983 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1984 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1985 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1986 }
1987 else
1988 {
1989 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1990 /** @todo must wipe the shadow page table in this case. */
1991 }
1992 }
1993# if defined(IN_RC)
1994 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1995 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1996# endif
1997 return VINF_SUCCESS;
1998 }
1999
2000 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
2001 }
2002 else if (fPdeValid)
2003 {
2004 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2005 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2006 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2007 }
2008 else
2009 {
2010/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2011 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2012 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2013 }
2014
2015 /*
2016 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2017 * Yea, I'm lazy.
2018 */
2019 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2020 ASMAtomicWriteSize(pPdeDst, 0);
2021
2022# if defined(IN_RC)
2023 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2024 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2025# endif
2026 PGM_INVL_VCPU_TLBS(pVCpu);
2027 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2028
2029
2030#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2031 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2032 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2033 && !defined(IN_RC)
2034
2035# ifdef PGM_SYNC_N_PAGES
2036 /*
2037 * Get the shadow PDE, find the shadow page table in the pool.
2038 */
2039# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2040 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
2041
2042# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2043 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
2044
2045# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2046 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2047 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2048 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2049 X86PDEPAE PdeDst;
2050 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2051
2052 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2053 AssertRCSuccessReturn(rc, rc);
2054 Assert(pPDDst && pPdptDst);
2055 PdeDst = pPDDst->a[iPDDst];
2056# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2057 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2058 PEPTPD pPDDst;
2059 EPTPDE PdeDst;
2060
2061 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2062 if (rc != VINF_SUCCESS)
2063 {
2064 AssertRC(rc);
2065 return rc;
2066 }
2067 Assert(pPDDst);
2068 PdeDst = pPDDst->a[iPDDst];
2069# endif
2070 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2071 if (!PdeDst.n.u1Present)
2072 {
2073 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2074 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2075 return VINF_SUCCESS; /* force the instruction to be executed again. */
2076 }
2077
2078 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2079 if (PdeDst.n.u1Size)
2080 {
2081 Assert(HWACCMIsNestedPagingActive(pVM));
2082 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2083 return VINF_SUCCESS;
2084 }
2085
2086 /* Mask away the page offset. */
2087 GCPtrPage &= ~((RTGCPTR)0xfff);
2088
2089 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2090 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2091
2092 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2093 if ( cPages > 1
2094 && !(uErr & X86_TRAP_PF_P)
2095 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2096 {
2097 /*
2098 * This code path is currently only taken when the caller is PGMTrap0eHandler
2099 * for non-present pages!
2100 *
2101 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2102 * deal with locality.
2103 */
2104 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2105 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2106 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2107 iPTDst = 0;
2108 else
2109 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2110 for (; iPTDst < iPTDstEnd; iPTDst++)
2111 {
2112 if (!pPTDst->a[iPTDst].n.u1Present)
2113 {
2114 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2115 GSTPTE PteSrc;
2116
2117 /* Fake the page table entry */
2118 PteSrc.u = GCPtrCurPage;
2119 PteSrc.n.u1Present = 1;
2120 PteSrc.n.u1Dirty = 1;
2121 PteSrc.n.u1Accessed = 1;
2122 PteSrc.n.u1Write = 1;
2123 PteSrc.n.u1User = 1;
2124
2125 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2126
2127 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2128 GCPtrCurPage, PteSrc.n.u1Present,
2129 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2130 PteSrc.n.u1User & PdeSrc.n.u1User,
2131 (uint64_t)PteSrc.u,
2132 (uint64_t)pPTDst->a[iPTDst].u,
2133 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2134
2135 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2136 break;
2137 }
2138 else
2139 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2140 }
2141 }
2142 else
2143# endif /* PGM_SYNC_N_PAGES */
2144 {
2145 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2146 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2147 GSTPTE PteSrc;
2148
2149 /* Fake the page table entry */
2150 PteSrc.u = GCPtrCurPage;
2151 PteSrc.n.u1Present = 1;
2152 PteSrc.n.u1Dirty = 1;
2153 PteSrc.n.u1Accessed = 1;
2154 PteSrc.n.u1Write = 1;
2155 PteSrc.n.u1User = 1;
2156 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2157
2158 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2159 GCPtrPage, PteSrc.n.u1Present,
2160 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2161 PteSrc.n.u1User & PdeSrc.n.u1User,
2162 (uint64_t)PteSrc.u,
2163 (uint64_t)pPTDst->a[iPTDst].u,
2164 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2165 }
2166 return VINF_SUCCESS;
2167
2168#else
2169 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2170 return VERR_INTERNAL_ERROR;
2171#endif
2172}
2173
2174
2175#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2176
2177/**
2178 * CheckPageFault helper for returning a page fault indicating a non-present
2179 * (NP) entry in the page translation structures.
2180 *
2181 * @returns VINF_EM_RAW_GUEST_TRAP.
2182 * @param pVCpu The virtual CPU to operate on.
2183 * @param uErr The error code of the shadow fault. Corrections to
2184 * TRPM's copy will be made if necessary.
2185 * @param GCPtrPage For logging.
2186 * @param uPageFaultLevel For logging.
2187 */
2188DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2189{
2190 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2191 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2192 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2193 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2194 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2195
2196 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2197 return VINF_EM_RAW_GUEST_TRAP;
2198}
2199
2200
2201/**
2202 * CheckPageFault helper for returning a page fault indicating a reserved bit
2203 * (RSVD) error in the page translation structures.
2204 *
2205 * @returns VINF_EM_RAW_GUEST_TRAP.
2206 * @param pVCpu The virtual CPU to operate on.
2207 * @param uErr The error code of the shadow fault. Corrections to
2208 * TRPM's copy will be made if necessary.
2209 * @param GCPtrPage For logging.
2210 * @param uPageFaultLevel For logging.
2211 */
2212DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2213{
2214 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2215 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2216 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2217
2218 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2219 return VINF_EM_RAW_GUEST_TRAP;
2220}
2221
2222
2223/**
2224 * CheckPageFault helper for returning a page protection fault (P).
2225 *
2226 * @returns VINF_EM_RAW_GUEST_TRAP.
2227 * @param pVCpu The virtual CPU to operate on.
2228 * @param uErr The error code of the shadow fault. Corrections to
2229 * TRPM's copy will be made if necessary.
2230 * @param GCPtrPage For logging.
2231 * @param uPageFaultLevel For logging.
2232 */
2233DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2234{
2235 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2236 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2237 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2238 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2239
2240 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2241 return VINF_EM_RAW_GUEST_TRAP;
2242}
2243
2244
2245/**
2246 * Investigate a page fault to identify ones targetted at the guest and to
2247 * handle write protection page faults caused by dirty bit tracking.
2248 *
2249 * This will do detect invalid entries and raise X86_TRAP_PF_RSVD.
2250 *
2251 * @returns VBox status code.
2252 * @param pVCpu The VMCPU handle.
2253 * @param uErr Page fault error code. The X86_TRAP_PF_RSVD flag
2254 * cannot be trusted as it is used for MMIO optimizations.
2255 * @param pPdeSrc Guest page directory entry.
2256 * @param GCPtrPage Guest context page address.
2257 */
2258PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2259{
2260 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2261 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2262# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2263 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2264# endif
2265 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2266 PVM pVM = pVCpu->CTX_SUFF(pVM);
2267 int rc;
2268
2269 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2270
2271 /*
2272 * Note! For PAE it is safe to assume that bad guest physical addresses
2273 * (which returns all FFs) in the translation tables will cause
2274 * #PF(RSVD). The same will be the case for long mode provided the
2275 * physical address width is less than 52 bits - this we ASSUME.
2276 *
2277 * Note! No convenient shortcuts here, we have to validate everything!
2278 */
2279
2280# if PGM_GST_TYPE == PGM_TYPE_AMD64
2281 /*
2282 * Real page fault? (PML4E level)
2283 */
2284 PX86PML4 pPml4Src = pgmGstGetLongModePML4Ptr(pVCpu);
2285 if (RT_UNLIKELY(!pPml4Src))
2286 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2287
2288 PX86PML4E pPml4eSrc = &pPml4Src->a[(GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK];
2289 if (!pPml4eSrc->n.u1Present)
2290 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 0);
2291 if (RT_UNLIKELY(!GST_IS_PML4E_VALID(pVCpu, *pPml4eSrc)))
2292 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2293 if ( (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2294 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2295 || (fUserLevelFault && !pPml4eSrc->n.u1User) )
2296 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2297
2298 /*
2299 * Real page fault? (PDPE level)
2300 */
2301 PX86PDPT pPdptSrc;
2302 rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK, &pPdptSrc);
2303 if (RT_FAILURE(rc))
2304 {
2305 AssertMsgReturn(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc), rc);
2306 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2307 }
2308
2309 PX86PDPE pPdpeSrc = &pPdptSrc->a[(GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64];
2310 if (!pPdpeSrc->n.u1Present)
2311 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2312 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2313 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2314 if ( (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2315 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2316 || (fUserLevelFault && !pPdpeSrc->lm.u1User) )
2317 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 1);
2318
2319# elif PGM_GST_TYPE == PGM_TYPE_PAE
2320 /*
2321 * Real page fault? (PDPE level)
2322 */
2323 PX86PDPT pPdptSrc = pgmGstGetPaePDPTPtr(pVCpu);
2324 if (RT_UNLIKELY(!pPdptSrc))
2325 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2326/** @todo Handle bad CR3 address. */
2327 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(pVCpu, GCPtrPage);
2328 if (!pPdpeSrc->n.u1Present)
2329 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2330 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2331 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2332# endif /* PGM_GST_TYPE == PGM_TYPE_PAE */
2333
2334 /*
2335 * Real page fault? (PDE level)
2336 */
2337 if (!pPdeSrc->n.u1Present)
2338 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 2);
2339# if PGM_GST_TYPE == PGM_TYPE_32BIT
2340 bool const fBigPage = pPdeSrc->b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
2341# else
2342 bool const fBigPage = pPdeSrc->b.u1Size;
2343# endif
2344 if (!fBigPage ? !GST_IS_PDE_VALID(pVCpu, *pPdeSrc) : !GST_IS_BIG_PDE_VALID(pVCpu, *pPdeSrc))
2345 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 2);
2346 if ( (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2347# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2348 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2349# endif
2350 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2351 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 2);
2352
2353 /*
2354 * First check the easy case where the page directory has been marked
2355 * read-only to track the dirty bit of an emulated BIG page.
2356 */
2357 if (fBigPage)
2358 {
2359 /* Mark guest page directory as accessed */
2360# if PGM_GST_TYPE == PGM_TYPE_AMD64
2361 pPml4eSrc->n.u1Accessed = 1;
2362 pPdpeSrc->lm.u1Accessed = 1;
2363# endif
2364 pPdeSrc->b.u1Accessed = 1;
2365
2366 /* Mark the entry guest PDE dirty it it's a write access. */
2367 if (fWriteFault)
2368 pPdeSrc->b.u1Dirty = 1;
2369 }
2370 else
2371 {
2372 /*
2373 * Map the guest page table.
2374 */
2375 PGSTPT pPTSrc;
2376 PGSTPTE pPteSrc;
2377 GSTPTE PteSrc;
2378 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2379 if (RT_SUCCESS(rc))
2380 {
2381 pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2382 PteSrc.u = pPteSrc->u;
2383 }
2384 else if (rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS)
2385 {
2386 /* All bits in the PTE are set. */
2387# if PGM_GST_TYPE == PGM_TYPE_32BIT
2388 PteSrc.u = UINT32_MAX;
2389# else
2390 PteSrc.u = UINT64_MAX;
2391# endif
2392 pPteSrc = &PteSrc;
2393 }
2394 else
2395 {
2396 AssertRC(rc);
2397 return rc;
2398 }
2399
2400 /*
2401 * Real page fault?
2402 */
2403 if (!PteSrc.n.u1Present)
2404 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 3);
2405 if (!GST_IS_PTE_VALID(pVCpu, PteSrc))
2406 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 3);
2407 if ( (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2408# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2409 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2410# endif
2411 || (fUserLevelFault && !PteSrc.n.u1User) )
2412 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2413
2414 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2415
2416 /*
2417 * Set the accessed bits in the page directory and the page table.
2418 */
2419# if PGM_GST_TYPE == PGM_TYPE_AMD64
2420 pPml4eSrc->n.u1Accessed = 1;
2421 pPdpeSrc->lm.u1Accessed = 1;
2422# endif
2423 pPdeSrc->n.u1Accessed = 1;
2424 pPteSrc->n.u1Accessed = 1;
2425
2426 /*
2427 * Set the dirty flag in the PTE if it's a write access.
2428 */
2429 if (fWriteFault)
2430 {
2431# ifdef VBOX_WITH_STATISTICS
2432 if (!pPteSrc->n.u1Dirty)
2433 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2434 else
2435 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2436# endif
2437
2438 pPteSrc->n.u1Dirty = 1;
2439 }
2440 }
2441 return VINF_SUCCESS;
2442}
2443
2444
2445/**
2446 * Handle dirty bit tracking faults.
2447 *
2448 * @returns VBox status code.
2449 * @param pVCpu The VMCPU handle.
2450 * @param uErr Page fault error code.
2451 * @param pPdeSrc Guest page directory entry.
2452 * @param pPdeDst Shadow page directory entry.
2453 * @param GCPtrPage Guest context page address.
2454 */
2455PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2456{
2457# if PGM_GST_TYPE == PGM_TYPE_32BIT
2458 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2459# else
2460 const bool fBigPagesSupported = true;
2461# endif
2462 PVM pVM = pVCpu->CTX_SUFF(pVM);
2463 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2464
2465 Assert(PGMIsLockOwner(pVM));
2466
2467 /*
2468 * Handle big page.
2469 */
2470 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2471 {
2472 if ( pPdeDst->n.u1Present
2473 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2474 {
2475 SHWPDE PdeDst = *pPdeDst;
2476
2477 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2478 Assert(pPdeSrc->b.u1Write);
2479
2480 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2481 * fault again and take this path to only invalidate the entry (see below).
2482 */
2483 PdeDst.n.u1Write = 1;
2484 PdeDst.n.u1Accessed = 1;
2485 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2486 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2487 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2488 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2489 }
2490
2491# ifdef IN_RING0
2492 /* Check for stale TLB entry; only applies to the SMP guest case. */
2493 if ( pVM->cCpus > 1
2494 && pPdeDst->n.u1Write
2495 && pPdeDst->n.u1Accessed)
2496 {
2497 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2498 if (pShwPage)
2499 {
2500 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2501 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2502 if ( pPteDst->n.u1Present
2503 && pPteDst->n.u1Write)
2504 {
2505 /* Stale TLB entry. */
2506 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2507 PGM_INVL_PG(pVCpu, GCPtrPage);
2508 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2509 }
2510 }
2511 }
2512# endif /* IN_RING0 */
2513 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2514 }
2515
2516 /*
2517 * Map the guest page table.
2518 */
2519 PGSTPT pPTSrc;
2520 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2521 if (RT_FAILURE(rc))
2522 {
2523 AssertRC(rc);
2524 return rc;
2525 }
2526
2527 if (pPdeDst->n.u1Present)
2528 {
2529 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2530 const GSTPTE PteSrc = *pPteSrc;
2531
2532#ifndef IN_RING0
2533 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2534 * Our individual shadow handlers will provide more information and force a fatal exit.
2535 */
2536 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2537 {
2538 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2539 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2540 }
2541#endif
2542 /*
2543 * Map shadow page table.
2544 */
2545 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2546 if (pShwPage)
2547 {
2548 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2549 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2550 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2551 {
2552 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2553 {
2554 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2555 SHWPTE PteDst = *pPteDst;
2556
2557 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2558 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2559
2560 Assert(pPteSrc->n.u1Write);
2561
2562 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2563 * entry will not harm; write access will simply fault again and
2564 * take this path to only invalidate the entry.
2565 */
2566 if (RT_LIKELY(pPage))
2567 {
2568 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2569 {
2570 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2571 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2572 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2573 PteDst.n.u1Write = 0;
2574 }
2575 else
2576 {
2577 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2578 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2579 {
2580 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2581 AssertRC(rc);
2582 }
2583 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2584 PteDst.n.u1Write = 1;
2585 else
2586 {
2587 /* Still applies to shared pages. */
2588 Assert(!PGM_PAGE_IS_ZERO(pPage));
2589 PteDst.n.u1Write = 0;
2590 }
2591 }
2592 }
2593 else
2594 PteDst.n.u1Write = 1; /** @todo r=bird: This doesn't make sense to me. */
2595
2596 PteDst.n.u1Dirty = 1;
2597 PteDst.n.u1Accessed = 1;
2598 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2599 ASMAtomicWriteSize(pPteDst, PteDst.u);
2600 PGM_INVL_PG(pVCpu, GCPtrPage);
2601 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2602 }
2603
2604# ifdef IN_RING0
2605 /* Check for stale TLB entry; only applies to the SMP guest case. */
2606 if ( pVM->cCpus > 1
2607 && pPteDst->n.u1Write == 1
2608 && pPteDst->n.u1Accessed == 1)
2609 {
2610 /* Stale TLB entry. */
2611 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2612 PGM_INVL_PG(pVCpu, GCPtrPage);
2613 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2614 }
2615# endif
2616 }
2617 }
2618 else
2619 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2620 }
2621
2622 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2623}
2624
2625#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2626
2627
2628/**
2629 * Sync a shadow page table.
2630 *
2631 * The shadow page table is not present. This includes the case where
2632 * there is a conflict with a mapping.
2633 *
2634 * @returns VBox status code.
2635 * @param pVCpu The VMCPU handle.
2636 * @param iPD Page directory index.
2637 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2638 * Assume this is a temporary mapping.
2639 * @param GCPtrPage GC Pointer of the page that caused the fault
2640 */
2641PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2642{
2643 PVM pVM = pVCpu->CTX_SUFF(pVM);
2644 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2645
2646 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2647#if 0 /* rarely useful; leave for debugging. */
2648 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2649#endif
2650 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2651
2652 Assert(PGMIsLocked(pVM));
2653
2654#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2655 || PGM_GST_TYPE == PGM_TYPE_PAE \
2656 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2657 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2658 && PGM_SHW_TYPE != PGM_TYPE_EPT
2659
2660 int rc = VINF_SUCCESS;
2661
2662 /*
2663 * Validate input a little bit.
2664 */
2665 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2666# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2667 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2668 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2669
2670 /* Fetch the pgm pool shadow descriptor. */
2671 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2672 Assert(pShwPde);
2673
2674# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2675 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2676 PPGMPOOLPAGE pShwPde = NULL;
2677 PX86PDPAE pPDDst;
2678 PSHWPDE pPdeDst;
2679
2680 /* Fetch the pgm pool shadow descriptor. */
2681 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2682 AssertRCSuccessReturn(rc, rc);
2683 Assert(pShwPde);
2684
2685 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2686 pPdeDst = &pPDDst->a[iPDDst];
2687
2688# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2689 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2690 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2691 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2692 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2693 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2694 AssertRCSuccessReturn(rc, rc);
2695 Assert(pPDDst);
2696 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2697# endif
2698 SHWPDE PdeDst = *pPdeDst;
2699
2700# if PGM_GST_TYPE == PGM_TYPE_AMD64
2701 /* Fetch the pgm pool shadow descriptor. */
2702 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2703 Assert(pShwPde);
2704# endif
2705
2706# ifndef PGM_WITHOUT_MAPPINGS
2707 /*
2708 * Check for conflicts.
2709 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2710 * R3: Simply resolve the conflict.
2711 */
2712 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2713 {
2714 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2715# ifndef IN_RING3
2716 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2717 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2718 return VERR_ADDRESS_CONFLICT;
2719
2720# else /* IN_RING3 */
2721 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2722 Assert(pMapping);
2723# if PGM_GST_TYPE == PGM_TYPE_32BIT
2724 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2725# elif PGM_GST_TYPE == PGM_TYPE_PAE
2726 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2727# else
2728 AssertFailed(); /* can't happen for amd64 */
2729# endif
2730 if (RT_FAILURE(rc))
2731 {
2732 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2733 return rc;
2734 }
2735 PdeDst = *pPdeDst;
2736# endif /* IN_RING3 */
2737 }
2738# endif /* !PGM_WITHOUT_MAPPINGS */
2739 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2740
2741# if defined(IN_RC)
2742 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2743 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2744# endif
2745
2746 /*
2747 * Sync page directory entry.
2748 */
2749 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2750 if (PdeSrc.n.u1Present)
2751 {
2752 /*
2753 * Allocate & map the page table.
2754 */
2755 PSHWPT pPTDst;
2756# if PGM_GST_TYPE == PGM_TYPE_32BIT
2757 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2758# else
2759 const bool fPageTable = !PdeSrc.b.u1Size;
2760# endif
2761 PPGMPOOLPAGE pShwPage;
2762 RTGCPHYS GCPhys;
2763 if (fPageTable)
2764 {
2765 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2766# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2767 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2768 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2769# endif
2770 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2771 }
2772 else
2773 {
2774 PGMPOOLACCESS enmAccess;
2775# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2776 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2777# else
2778 const bool fNoExecute = false;
2779# endif
2780
2781 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
2782# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2783 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2784 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2785# endif
2786 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2787 if (PdeSrc.n.u1User)
2788 {
2789 if (PdeSrc.n.u1Write)
2790 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2791 else
2792 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2793 }
2794 else
2795 {
2796 if (PdeSrc.n.u1Write)
2797 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2798 else
2799 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2800 }
2801 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2802 }
2803 if (rc == VINF_SUCCESS)
2804 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2805 else if (rc == VINF_PGM_CACHED_PAGE)
2806 {
2807 /*
2808 * The PT was cached, just hook it up.
2809 */
2810 if (fPageTable)
2811 PdeDst.u = pShwPage->Core.Key
2812 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2813 else
2814 {
2815 PdeDst.u = pShwPage->Core.Key
2816 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2817 /* (see explanation and assumptions further down.) */
2818 if ( !PdeSrc.b.u1Dirty
2819 && PdeSrc.b.u1Write)
2820 {
2821 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2822 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2823 PdeDst.b.u1Write = 0;
2824 }
2825 }
2826 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2827# if defined(IN_RC)
2828 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2829# endif
2830 return VINF_SUCCESS;
2831 }
2832 else if (rc == VERR_PGM_POOL_FLUSHED)
2833 {
2834 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2835# if defined(IN_RC)
2836 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2837# endif
2838 return VINF_PGM_SYNC_CR3;
2839 }
2840 else
2841 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2842 PdeDst.u &= X86_PDE_AVL_MASK;
2843 PdeDst.u |= pShwPage->Core.Key;
2844
2845 /*
2846 * Page directory has been accessed (this is a fault situation, remember).
2847 */
2848 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2849 if (fPageTable)
2850 {
2851 /*
2852 * Page table - 4KB.
2853 *
2854 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2855 */
2856 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2857 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2858 PGSTPT pPTSrc;
2859 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2860 if (RT_SUCCESS(rc))
2861 {
2862 /*
2863 * Start by syncing the page directory entry so CSAM's TLB trick works.
2864 */
2865 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2866 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2867 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2868# if defined(IN_RC)
2869 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2870# endif
2871
2872 /*
2873 * Directory/page user or supervisor privilege: (same goes for read/write)
2874 *
2875 * Directory Page Combined
2876 * U/S U/S U/S
2877 * 0 0 0
2878 * 0 1 0
2879 * 1 0 0
2880 * 1 1 1
2881 *
2882 * Simple AND operation. Table listed for completeness.
2883 *
2884 */
2885 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2886# ifdef PGM_SYNC_N_PAGES
2887 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2888 unsigned iPTDst = iPTBase;
2889 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2890 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2891 iPTDst = 0;
2892 else
2893 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2894# else /* !PGM_SYNC_N_PAGES */
2895 unsigned iPTDst = 0;
2896 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2897# endif /* !PGM_SYNC_N_PAGES */
2898# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2899 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2900 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2901# else
2902 const unsigned offPTSrc = 0;
2903# endif
2904 for (; iPTDst < iPTDstEnd; iPTDst++)
2905 {
2906 const unsigned iPTSrc = iPTDst + offPTSrc;
2907 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2908
2909 if (PteSrc.n.u1Present)
2910 {
2911# ifndef IN_RING0
2912 /*
2913 * Assuming kernel code will be marked as supervisor - and not as user level
2914 * and executed using a conforming code selector - And marked as readonly.
2915 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2916 */
2917 PPGMPAGE pPage;
2918 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2919 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2920 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2921 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2922 )
2923# endif
2924 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2925 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2926 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2927 PteSrc.n.u1Present,
2928 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2929 PteSrc.n.u1User & PdeSrc.n.u1User,
2930 (uint64_t)PteSrc.u,
2931 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2932 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2933 }
2934 /* else: the page table was cleared by the pool */
2935 } /* for PTEs */
2936 }
2937 }
2938 else
2939 {
2940 /*
2941 * Big page - 2/4MB.
2942 *
2943 * We'll walk the ram range list in parallel and optimize lookups.
2944 * We will only sync on shadow page table at a time.
2945 */
2946 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2947
2948 /**
2949 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2950 */
2951
2952 /*
2953 * Start by syncing the page directory entry.
2954 */
2955 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2956 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2957
2958 /*
2959 * If the page is not flagged as dirty and is writable, then make it read-only
2960 * at PD level, so we can set the dirty bit when the page is modified.
2961 *
2962 * ASSUMES that page access handlers are implemented on page table entry level.
2963 * Thus we will first catch the dirty access and set PDE.D and restart. If
2964 * there is an access handler, we'll trap again and let it work on the problem.
2965 */
2966 /** @todo move the above stuff to a section in the PGM documentation. */
2967 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2968 if ( !PdeSrc.b.u1Dirty
2969 && PdeSrc.b.u1Write)
2970 {
2971 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2972 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2973 PdeDst.b.u1Write = 0;
2974 }
2975 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2976# if defined(IN_RC)
2977 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2978# endif
2979
2980 /*
2981 * Fill the shadow page table.
2982 */
2983 /* Get address and flags from the source PDE. */
2984 SHWPTE PteDstBase;
2985 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2986
2987 /* Loop thru the entries in the shadow PT. */
2988 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2989 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2990 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2991 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2992 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2993 unsigned iPTDst = 0;
2994 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2995 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2996 {
2997 /* Advance ram range list. */
2998 while (pRam && GCPhys > pRam->GCPhysLast)
2999 pRam = pRam->CTX_SUFF(pNext);
3000 if (pRam && GCPhys >= pRam->GCPhys)
3001 {
3002 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
3003 do
3004 {
3005 /* Make shadow PTE. */
3006 PPGMPAGE pPage = &pRam->aPages[iHCPage];
3007 SHWPTE PteDst;
3008
3009# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3010 /* Try to make the page writable if necessary. */
3011 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
3012 && ( PGM_PAGE_IS_ZERO(pPage)
3013 || ( PteDstBase.n.u1Write
3014 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
3015# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
3016 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
3017# endif
3018# ifdef VBOX_WITH_PAGE_SHARING
3019 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
3020# endif
3021 && !PGM_PAGE_IS_BALLOONED(pPage))
3022 )
3023 )
3024 {
3025 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
3026 AssertRCReturn(rc, rc);
3027 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3028 break;
3029 }
3030# endif
3031
3032 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
3033 {
3034 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
3035 {
3036 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3037 PteDst.n.u1Write = 0;
3038 }
3039 else
3040 PteDst.u = 0;
3041 }
3042 else if (PGM_PAGE_IS_BALLOONED(pPage))
3043 {
3044 /* Skip ballooned pages. */
3045 PteDst.u = 0;
3046 }
3047# ifndef IN_RING0
3048 /*
3049 * Assuming kernel code will be marked as supervisor and not as user level and executed
3050 * using a conforming code selector. Don't check for readonly, as that implies the whole
3051 * 4MB can be code or readonly data. Linux enables write access for its large pages.
3052 */
3053 else if ( !PdeSrc.n.u1User
3054 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
3055 PteDst.u = 0;
3056# endif
3057 else
3058 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3059
3060 /* Only map writable pages writable. */
3061 if ( PteDst.n.u1Write
3062 && PteDst.n.u1Present
3063 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
3064 {
3065 /* Still applies to shared pages. */
3066 Assert(!PGM_PAGE_IS_ZERO(pPage));
3067 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
3068 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
3069 }
3070
3071 if (PteDst.n.u1Present)
3072 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
3073
3074 /* commit it */
3075 pPTDst->a[iPTDst] = PteDst;
3076 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
3077 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
3078 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3079
3080 /* advance */
3081 GCPhys += PAGE_SIZE;
3082 iHCPage++;
3083 iPTDst++;
3084 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3085 && GCPhys <= pRam->GCPhysLast);
3086 }
3087 else if (pRam)
3088 {
3089 Log(("Invalid pages at %RGp\n", GCPhys));
3090 do
3091 {
3092 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3093 GCPhys += PAGE_SIZE;
3094 iPTDst++;
3095 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3096 && GCPhys < pRam->GCPhys);
3097 }
3098 else
3099 {
3100 Log(("Invalid pages at %RGp (2)\n", GCPhys));
3101 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3102 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3103 }
3104 } /* while more PTEs */
3105 } /* 4KB / 4MB */
3106 }
3107 else
3108 AssertRelease(!PdeDst.n.u1Present);
3109
3110 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3111 if (RT_FAILURE(rc))
3112 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
3113 return rc;
3114
3115#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3116 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3117 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3118 && !defined(IN_RC)
3119
3120 /*
3121 * Validate input a little bit.
3122 */
3123 int rc = VINF_SUCCESS;
3124# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3125 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3126 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3127
3128 /* Fetch the pgm pool shadow descriptor. */
3129 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3130 Assert(pShwPde);
3131
3132# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3133 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3134 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3135 PX86PDPAE pPDDst;
3136 PSHWPDE pPdeDst;
3137
3138 /* Fetch the pgm pool shadow descriptor. */
3139 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
3140 AssertRCSuccessReturn(rc, rc);
3141 Assert(pShwPde);
3142
3143 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
3144 pPdeDst = &pPDDst->a[iPDDst];
3145
3146# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3147 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3148 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3149 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3150 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3151 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3152 AssertRCSuccessReturn(rc, rc);
3153 Assert(pPDDst);
3154 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3155
3156 /* Fetch the pgm pool shadow descriptor. */
3157 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3158 Assert(pShwPde);
3159
3160# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3161 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3162 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3163 PEPTPD pPDDst;
3164 PEPTPDPT pPdptDst;
3165
3166 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3167 if (rc != VINF_SUCCESS)
3168 {
3169 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3170 AssertRC(rc);
3171 return rc;
3172 }
3173 Assert(pPDDst);
3174 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3175
3176 /* Fetch the pgm pool shadow descriptor. */
3177 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3178 Assert(pShwPde);
3179# endif
3180 SHWPDE PdeDst = *pPdeDst;
3181
3182 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3183 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3184
3185# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3186 if (BTH_IS_NP_ACTIVE(pVM))
3187 {
3188 PPGMPAGE pPage;
3189
3190 /* Check if we allocated a big page before for this 2 MB range. */
3191 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3192 if (RT_SUCCESS(rc))
3193 {
3194 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3195
3196 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3197 {
3198 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3199 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3200 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3201 }
3202 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3203 {
3204 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3205 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3206 if (RT_SUCCESS(rc))
3207 {
3208 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3209 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3210 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3211 }
3212 }
3213 else if (PGMIsUsingLargePages(pVM))
3214 {
3215 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3216 if (RT_SUCCESS(rc))
3217 {
3218 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3219 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3220 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3221 }
3222 else
3223 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3224 }
3225
3226 if (HCPhys != NIL_RTHCPHYS)
3227 {
3228 PdeDst.u &= X86_PDE_AVL_MASK;
3229 PdeDst.u |= HCPhys;
3230 PdeDst.n.u1Present = 1;
3231 PdeDst.n.u1Write = 1;
3232 PdeDst.b.u1Size = 1;
3233# if PGM_SHW_TYPE == PGM_TYPE_EPT
3234 PdeDst.n.u1Execute = 1;
3235 PdeDst.b.u1IgnorePAT = 1;
3236 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3237# else
3238 PdeDst.n.u1User = 1;
3239# endif
3240 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3241
3242 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3243 /* Add a reference to the first page only. */
3244 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3245
3246 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3247 return VINF_SUCCESS;
3248 }
3249 }
3250 }
3251# endif /* HC_ARCH_BITS == 64 */
3252
3253 GSTPDE PdeSrc;
3254 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3255 PdeSrc.n.u1Present = 1;
3256 PdeSrc.n.u1Write = 1;
3257 PdeSrc.n.u1Accessed = 1;
3258 PdeSrc.n.u1User = 1;
3259
3260 /*
3261 * Allocate & map the page table.
3262 */
3263 PSHWPT pPTDst;
3264 PPGMPOOLPAGE pShwPage;
3265 RTGCPHYS GCPhys;
3266
3267 /* Virtual address = physical address */
3268 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3269 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3270
3271 if ( rc == VINF_SUCCESS
3272 || rc == VINF_PGM_CACHED_PAGE)
3273 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3274 else
3275 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3276
3277 PdeDst.u &= X86_PDE_AVL_MASK;
3278 PdeDst.u |= pShwPage->Core.Key;
3279 PdeDst.n.u1Present = 1;
3280 PdeDst.n.u1Write = 1;
3281# if PGM_SHW_TYPE == PGM_TYPE_EPT
3282 PdeDst.n.u1Execute = 1;
3283# else
3284 PdeDst.n.u1User = 1;
3285 PdeDst.n.u1Accessed = 1;
3286# endif
3287 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3288
3289 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3290 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3291 return rc;
3292
3293#else
3294 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3295 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3296 return VERR_INTERNAL_ERROR;
3297#endif
3298}
3299
3300
3301
3302/**
3303 * Prefetch a page/set of pages.
3304 *
3305 * Typically used to sync commonly used pages before entering raw mode
3306 * after a CR3 reload.
3307 *
3308 * @returns VBox status code.
3309 * @param pVCpu The VMCPU handle.
3310 * @param GCPtrPage Page to invalidate.
3311 */
3312PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3313{
3314#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3315 || PGM_GST_TYPE == PGM_TYPE_REAL \
3316 || PGM_GST_TYPE == PGM_TYPE_PROT \
3317 || PGM_GST_TYPE == PGM_TYPE_PAE \
3318 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3319 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3320 && PGM_SHW_TYPE != PGM_TYPE_EPT
3321
3322 /*
3323 * Check that all Guest levels thru the PDE are present, getting the
3324 * PD and PDE in the processes.
3325 */
3326 int rc = VINF_SUCCESS;
3327# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3328# if PGM_GST_TYPE == PGM_TYPE_32BIT
3329 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3330 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3331# elif PGM_GST_TYPE == PGM_TYPE_PAE
3332 unsigned iPDSrc;
3333 X86PDPE PdpeSrc;
3334 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3335 if (!pPDSrc)
3336 return VINF_SUCCESS; /* not present */
3337# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3338 unsigned iPDSrc;
3339 PX86PML4E pPml4eSrc;
3340 X86PDPE PdpeSrc;
3341 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3342 if (!pPDSrc)
3343 return VINF_SUCCESS; /* not present */
3344# endif
3345 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3346# else
3347 PGSTPD pPDSrc = NULL;
3348 const unsigned iPDSrc = 0;
3349 GSTPDE PdeSrc;
3350
3351 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3352 PdeSrc.n.u1Present = 1;
3353 PdeSrc.n.u1Write = 1;
3354 PdeSrc.n.u1Accessed = 1;
3355 PdeSrc.n.u1User = 1;
3356# endif
3357
3358 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3359 {
3360 PVM pVM = pVCpu->CTX_SUFF(pVM);
3361 pgmLock(pVM);
3362
3363# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3364 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3365# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3366 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3367 PX86PDPAE pPDDst;
3368 X86PDEPAE PdeDst;
3369# if PGM_GST_TYPE != PGM_TYPE_PAE
3370 X86PDPE PdpeSrc;
3371
3372 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3373 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3374# endif
3375 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3376 if (rc != VINF_SUCCESS)
3377 {
3378 pgmUnlock(pVM);
3379 AssertRC(rc);
3380 return rc;
3381 }
3382 Assert(pPDDst);
3383 PdeDst = pPDDst->a[iPDDst];
3384
3385# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3386 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3387 PX86PDPAE pPDDst;
3388 X86PDEPAE PdeDst;
3389
3390# if PGM_GST_TYPE == PGM_TYPE_PROT
3391 /* AMD-V nested paging */
3392 X86PML4E Pml4eSrc;
3393 X86PDPE PdpeSrc;
3394 PX86PML4E pPml4eSrc = &Pml4eSrc;
3395
3396 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3397 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3398 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3399# endif
3400
3401 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3402 if (rc != VINF_SUCCESS)
3403 {
3404 pgmUnlock(pVM);
3405 AssertRC(rc);
3406 return rc;
3407 }
3408 Assert(pPDDst);
3409 PdeDst = pPDDst->a[iPDDst];
3410# endif
3411 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3412 {
3413 if (!PdeDst.n.u1Present)
3414 {
3415 /** @todo r=bird: This guy will set the A bit on the PDE,
3416 * probably harmless. */
3417 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3418 }
3419 else
3420 {
3421 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3422 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3423 * makes no sense to prefetch more than one page.
3424 */
3425 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3426 if (RT_SUCCESS(rc))
3427 rc = VINF_SUCCESS;
3428 }
3429 }
3430 pgmUnlock(pVM);
3431 }
3432 return rc;
3433
3434#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3435 return VINF_SUCCESS; /* ignore */
3436#else
3437 AssertCompile(0);
3438#endif
3439}
3440
3441
3442
3443
3444/**
3445 * Syncs a page during a PGMVerifyAccess() call.
3446 *
3447 * @returns VBox status code (informational included).
3448 * @param pVCpu The VMCPU handle.
3449 * @param GCPtrPage The address of the page to sync.
3450 * @param fPage The effective guest page flags.
3451 * @param uErr The trap error code.
3452 * @remarks This will normally never be called on invalid guest page
3453 * translation entries.
3454 */
3455PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3456{
3457 PVM pVM = pVCpu->CTX_SUFF(pVM);
3458
3459 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3460
3461 Assert(!HWACCMIsNestedPagingActive(pVM));
3462#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3463 || PGM_GST_TYPE == PGM_TYPE_REAL \
3464 || PGM_GST_TYPE == PGM_TYPE_PROT \
3465 || PGM_GST_TYPE == PGM_TYPE_PAE \
3466 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3467 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3468 && PGM_SHW_TYPE != PGM_TYPE_EPT
3469
3470# ifndef IN_RING0
3471 if (!(fPage & X86_PTE_US))
3472 {
3473 /*
3474 * Mark this page as safe.
3475 */
3476 /** @todo not correct for pages that contain both code and data!! */
3477 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3478 CSAMMarkPage(pVM, GCPtrPage, true);
3479 }
3480# endif
3481
3482 /*
3483 * Get guest PD and index.
3484 */
3485 /** @todo Performance: We've done all this a jiffy ago in the
3486 * PGMGstGetPage call. */
3487# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3488# if PGM_GST_TYPE == PGM_TYPE_32BIT
3489 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3490 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3491
3492# elif PGM_GST_TYPE == PGM_TYPE_PAE
3493 unsigned iPDSrc = 0;
3494 X86PDPE PdpeSrc;
3495 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3496 if (RT_UNLIKELY(!pPDSrc))
3497 {
3498 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3499 return VINF_EM_RAW_GUEST_TRAP;
3500 }
3501
3502# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3503 unsigned iPDSrc;
3504 PX86PML4E pPml4eSrc;
3505 X86PDPE PdpeSrc;
3506 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3507 if (RT_UNLIKELY(!pPDSrc))
3508 {
3509 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3510 return VINF_EM_RAW_GUEST_TRAP;
3511 }
3512# endif
3513
3514# else /* !PGM_WITH_PAGING */
3515 PGSTPD pPDSrc = NULL;
3516 const unsigned iPDSrc = 0;
3517# endif /* !PGM_WITH_PAGING */
3518 int rc = VINF_SUCCESS;
3519
3520 pgmLock(pVM);
3521
3522 /*
3523 * First check if the shadow pd is present.
3524 */
3525# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3526 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3527
3528# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3529 PX86PDEPAE pPdeDst;
3530 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3531 PX86PDPAE pPDDst;
3532# if PGM_GST_TYPE != PGM_TYPE_PAE
3533 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3534 X86PDPE PdpeSrc;
3535 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3536# endif
3537 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3538 if (rc != VINF_SUCCESS)
3539 {
3540 pgmUnlock(pVM);
3541 AssertRC(rc);
3542 return rc;
3543 }
3544 Assert(pPDDst);
3545 pPdeDst = &pPDDst->a[iPDDst];
3546
3547# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3548 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3549 PX86PDPAE pPDDst;
3550 PX86PDEPAE pPdeDst;
3551
3552# if PGM_GST_TYPE == PGM_TYPE_PROT
3553 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3554 X86PML4E Pml4eSrc;
3555 X86PDPE PdpeSrc;
3556 PX86PML4E pPml4eSrc = &Pml4eSrc;
3557 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3558 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3559# endif
3560
3561 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3562 if (rc != VINF_SUCCESS)
3563 {
3564 pgmUnlock(pVM);
3565 AssertRC(rc);
3566 return rc;
3567 }
3568 Assert(pPDDst);
3569 pPdeDst = &pPDDst->a[iPDDst];
3570# endif
3571
3572# if defined(IN_RC)
3573 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3574 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3575# endif
3576
3577 if (!pPdeDst->n.u1Present)
3578 {
3579 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3580 if (rc != VINF_SUCCESS)
3581 {
3582# if defined(IN_RC)
3583 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3584 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3585# endif
3586 pgmUnlock(pVM);
3587 AssertRC(rc);
3588 return rc;
3589 }
3590 }
3591
3592# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3593 /* Check for dirty bit fault */
3594 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3595 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3596 Log(("PGMVerifyAccess: success (dirty)\n"));
3597 else
3598# endif
3599 {
3600# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3601 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3602# else
3603 GSTPDE PdeSrc;
3604 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3605 PdeSrc.n.u1Present = 1;
3606 PdeSrc.n.u1Write = 1;
3607 PdeSrc.n.u1Accessed = 1;
3608 PdeSrc.n.u1User = 1;
3609# endif
3610
3611 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3612 if (uErr & X86_TRAP_PF_US)
3613 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3614 else /* supervisor */
3615 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3616
3617 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3618 if (RT_SUCCESS(rc))
3619 {
3620 /* Page was successfully synced */
3621 Log2(("PGMVerifyAccess: success (sync)\n"));
3622 rc = VINF_SUCCESS;
3623 }
3624 else
3625 {
3626 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3627 rc = VINF_EM_RAW_GUEST_TRAP;
3628 }
3629 }
3630# if defined(IN_RC)
3631 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3632 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3633# endif
3634 pgmUnlock(pVM);
3635 return rc;
3636
3637#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3638
3639 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3640 return VERR_INTERNAL_ERROR;
3641#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3642}
3643
3644
3645/**
3646 * Syncs the paging hierarchy starting at CR3.
3647 *
3648 * @returns VBox status code, no specials.
3649 * @param pVCpu The VMCPU handle.
3650 * @param cr0 Guest context CR0 register
3651 * @param cr3 Guest context CR3 register
3652 * @param cr4 Guest context CR4 register
3653 * @param fGlobal Including global page directories or not
3654 */
3655PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3656{
3657 PVM pVM = pVCpu->CTX_SUFF(pVM);
3658
3659 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3660
3661#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3662
3663 pgmLock(pVM);
3664
3665# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3666 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3667 if (pPool->cDirtyPages)
3668 pgmPoolResetDirtyPages(pVM);
3669# endif
3670
3671 /*
3672 * Update page access handlers.
3673 * The virtual are always flushed, while the physical are only on demand.
3674 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3675 * have to look into that later because it will have a bad influence on the performance.
3676 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3677 * bird: Yes, but that won't work for aliases.
3678 */
3679 /** @todo this MUST go away. See #1557. */
3680 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3681 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3682 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3683 pgmUnlock(pVM);
3684#endif /* !NESTED && !EPT */
3685
3686#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3687 /*
3688 * Nested / EPT - almost no work.
3689 */
3690 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3691 return VINF_SUCCESS;
3692
3693#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3694 /*
3695 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3696 * out the shadow parts when the guest modifies its tables.
3697 */
3698 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3699 return VINF_SUCCESS;
3700
3701#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3702
3703# ifndef PGM_WITHOUT_MAPPINGS
3704 /*
3705 * Check for and resolve conflicts with our guest mappings if they
3706 * are enabled and not fixed.
3707 */
3708 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3709 {
3710 int rc = pgmMapResolveConflicts(pVM);
3711 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3712 if (rc == VINF_PGM_SYNC_CR3)
3713 {
3714 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3715 return VINF_PGM_SYNC_CR3;
3716 }
3717 }
3718# else
3719 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3720# endif
3721 return VINF_SUCCESS;
3722#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3723}
3724
3725
3726
3727
3728#ifdef VBOX_STRICT
3729#ifdef IN_RC
3730# undef AssertMsgFailed
3731# define AssertMsgFailed Log
3732#endif
3733#ifdef IN_RING3
3734# include <VBox/dbgf.h>
3735
3736/**
3737 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3738 *
3739 * @returns VBox status code (VINF_SUCCESS).
3740 * @param cr3 The root of the hierarchy.
3741 * @param crr The cr4, only PAE and PSE is currently used.
3742 * @param fLongMode Set if long mode, false if not long mode.
3743 * @param cMaxDepth Number of levels to dump.
3744 * @param pHlp Pointer to the output functions.
3745 */
3746RT_C_DECLS_BEGIN
3747VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3748RT_C_DECLS_END
3749
3750#endif
3751
3752/**
3753 * Checks that the shadow page table is in sync with the guest one.
3754 *
3755 * @returns The number of errors.
3756 * @param pVM The virtual machine.
3757 * @param pVCpu The VMCPU handle.
3758 * @param cr3 Guest context CR3 register
3759 * @param cr4 Guest context CR4 register
3760 * @param GCPtr Where to start. Defaults to 0.
3761 * @param cb How much to check. Defaults to everything.
3762 */
3763PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3764{
3765#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3766 return 0;
3767#else
3768 unsigned cErrors = 0;
3769 PVM pVM = pVCpu->CTX_SUFF(pVM);
3770 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3771
3772#if PGM_GST_TYPE == PGM_TYPE_PAE
3773 /** @todo currently broken; crashes below somewhere */
3774 AssertFailed();
3775#endif
3776
3777#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3778 || PGM_GST_TYPE == PGM_TYPE_PAE \
3779 || PGM_GST_TYPE == PGM_TYPE_AMD64
3780
3781# if PGM_GST_TYPE == PGM_TYPE_32BIT
3782 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3783# else
3784 bool fBigPagesSupported = true;
3785# endif
3786 PPGMCPU pPGM = &pVCpu->pgm.s;
3787 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3788 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3789# ifndef IN_RING0
3790 RTHCPHYS HCPhys; /* general usage. */
3791# endif
3792 int rc;
3793
3794 /*
3795 * Check that the Guest CR3 and all its mappings are correct.
3796 */
3797 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3798 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3799 false);
3800# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3801# if PGM_GST_TYPE == PGM_TYPE_32BIT
3802 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3803# else
3804 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3805# endif
3806 AssertRCReturn(rc, 1);
3807 HCPhys = NIL_RTHCPHYS;
3808 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3809 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3810# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3811 pgmGstGet32bitPDPtr(pVCpu);
3812 RTGCPHYS GCPhys;
3813 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3814 AssertRCReturn(rc, 1);
3815 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3816# endif
3817# endif /* !IN_RING0 */
3818
3819 /*
3820 * Get and check the Shadow CR3.
3821 */
3822# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3823 unsigned cPDEs = X86_PG_ENTRIES;
3824 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3825# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3826# if PGM_GST_TYPE == PGM_TYPE_32BIT
3827 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3828# else
3829 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3830# endif
3831 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3832# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3833 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3834 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3835# endif
3836 if (cb != ~(RTGCPTR)0)
3837 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3838
3839/** @todo call the other two PGMAssert*() functions. */
3840
3841# if PGM_GST_TYPE == PGM_TYPE_AMD64
3842 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3843
3844 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3845 {
3846 PPGMPOOLPAGE pShwPdpt = NULL;
3847 PX86PML4E pPml4eSrc;
3848 PX86PML4E pPml4eDst;
3849 RTGCPHYS GCPhysPdptSrc;
3850
3851 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3852 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3853
3854 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3855 if (!pPml4eDst->n.u1Present)
3856 {
3857 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3858 continue;
3859 }
3860
3861 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3862 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3863
3864 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3865 {
3866 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3867 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3868 cErrors++;
3869 continue;
3870 }
3871
3872 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3873 {
3874 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3875 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3876 cErrors++;
3877 continue;
3878 }
3879
3880 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3881 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3882 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3883 {
3884 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3885 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3886 cErrors++;
3887 continue;
3888 }
3889# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3890 {
3891# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3892
3893# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3894 /*
3895 * Check the PDPTEs too.
3896 */
3897 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3898
3899 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3900 {
3901 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3902 PPGMPOOLPAGE pShwPde = NULL;
3903 PX86PDPE pPdpeDst;
3904 RTGCPHYS GCPhysPdeSrc;
3905# if PGM_GST_TYPE == PGM_TYPE_PAE
3906 X86PDPE PdpeSrc;
3907 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3908 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3909# else
3910 PX86PML4E pPml4eSrcIgn;
3911 X86PDPE PdpeSrc;
3912 PX86PDPT pPdptDst;
3913 PX86PDPAE pPDDst;
3914 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3915
3916 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3917 if (rc != VINF_SUCCESS)
3918 {
3919 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3920 GCPtr += 512 * _2M;
3921 continue; /* next PDPTE */
3922 }
3923 Assert(pPDDst);
3924# endif
3925 Assert(iPDSrc == 0);
3926
3927 pPdpeDst = &pPdptDst->a[iPdpt];
3928
3929 if (!pPdpeDst->n.u1Present)
3930 {
3931 GCPtr += 512 * _2M;
3932 continue; /* next PDPTE */
3933 }
3934
3935 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3936 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3937
3938 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3939 {
3940 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3941 GCPtr += 512 * _2M;
3942 cErrors++;
3943 continue;
3944 }
3945
3946 if (GCPhysPdeSrc != pShwPde->GCPhys)
3947 {
3948# if PGM_GST_TYPE == PGM_TYPE_AMD64
3949 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3950# else
3951 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3952# endif
3953 GCPtr += 512 * _2M;
3954 cErrors++;
3955 continue;
3956 }
3957
3958# if PGM_GST_TYPE == PGM_TYPE_AMD64
3959 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3960 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3961 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3962 {
3963 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3964 GCPtr += 512 * _2M;
3965 cErrors++;
3966 continue;
3967 }
3968# endif
3969
3970# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3971 {
3972# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3973# if PGM_GST_TYPE == PGM_TYPE_32BIT
3974 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3975# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3976 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3977# endif
3978# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3979 /*
3980 * Iterate the shadow page directory.
3981 */
3982 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3983 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3984
3985 for (;
3986 iPDDst < cPDEs;
3987 iPDDst++, GCPtr += cIncrement)
3988 {
3989# if PGM_SHW_TYPE == PGM_TYPE_PAE
3990 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3991# else
3992 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3993# endif
3994 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3995 {
3996 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
3997 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
3998 {
3999 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4000 cErrors++;
4001 continue;
4002 }
4003 }
4004 else if ( (PdeDst.u & X86_PDE_P)
4005 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4006 )
4007 {
4008 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4009 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
4010 if (!pPoolPage)
4011 {
4012 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4013 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4014 cErrors++;
4015 continue;
4016 }
4017 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4018
4019 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4020 {
4021 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4022 GCPtr, (uint64_t)PdeDst.u));
4023 cErrors++;
4024 }
4025
4026 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4027 {
4028 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4029 GCPtr, (uint64_t)PdeDst.u));
4030 cErrors++;
4031 }
4032
4033 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4034 if (!PdeSrc.n.u1Present)
4035 {
4036 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4037 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4038 cErrors++;
4039 continue;
4040 }
4041
4042 if ( !PdeSrc.b.u1Size
4043 || !fBigPagesSupported)
4044 {
4045 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4046# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4047 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4048# endif
4049 }
4050 else
4051 {
4052# if PGM_GST_TYPE == PGM_TYPE_32BIT
4053 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4054 {
4055 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4056 GCPtr, (uint64_t)PdeSrc.u));
4057 cErrors++;
4058 continue;
4059 }
4060# endif
4061 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
4062# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4063 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4064# endif
4065 }
4066
4067 if ( pPoolPage->enmKind
4068 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4069 {
4070 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4071 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4072 cErrors++;
4073 }
4074
4075 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4076 if (!pPhysPage)
4077 {
4078 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4079 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4080 cErrors++;
4081 continue;
4082 }
4083
4084 if (GCPhysGst != pPoolPage->GCPhys)
4085 {
4086 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4087 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4088 cErrors++;
4089 continue;
4090 }
4091
4092 if ( !PdeSrc.b.u1Size
4093 || !fBigPagesSupported)
4094 {
4095 /*
4096 * Page Table.
4097 */
4098 const GSTPT *pPTSrc;
4099 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4100 if (RT_FAILURE(rc))
4101 {
4102 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4103 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4104 cErrors++;
4105 continue;
4106 }
4107 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4108 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4109 {
4110 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4111 // (This problem will go away when/if we shadow multiple CR3s.)
4112 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4113 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4114 cErrors++;
4115 continue;
4116 }
4117 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4118 {
4119 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4120 GCPtr, (uint64_t)PdeDst.u));
4121 cErrors++;
4122 continue;
4123 }
4124
4125 /* iterate the page table. */
4126# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4127 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4128 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4129# else
4130 const unsigned offPTSrc = 0;
4131# endif
4132 for (unsigned iPT = 0, off = 0;
4133 iPT < RT_ELEMENTS(pPTDst->a);
4134 iPT++, off += PAGE_SIZE)
4135 {
4136 const SHWPTE PteDst = pPTDst->a[iPT];
4137
4138 /* skip not-present entries. */
4139 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4140 continue;
4141 Assert(PteDst.n.u1Present);
4142
4143 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4144 if (!PteSrc.n.u1Present)
4145 {
4146# ifdef IN_RING3
4147 PGMAssertHandlerAndFlagsInSync(pVM);
4148 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4149# endif
4150 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4151 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4152 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4153 cErrors++;
4154 continue;
4155 }
4156
4157 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4158# if 1 /** @todo sync accessed bit properly... */
4159 fIgnoreFlags |= X86_PTE_A;
4160# endif
4161
4162 /* match the physical addresses */
4163 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4164 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4165
4166# ifdef IN_RING3
4167 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4168 if (RT_FAILURE(rc))
4169 {
4170 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4171 {
4172 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4173 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4174 cErrors++;
4175 continue;
4176 }
4177 }
4178 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4179 {
4180 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4181 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4182 cErrors++;
4183 continue;
4184 }
4185# endif
4186
4187 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4188 if (!pPhysPage)
4189 {
4190# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4191 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4192 {
4193 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4194 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4195 cErrors++;
4196 continue;
4197 }
4198# endif
4199 if (PteDst.n.u1Write)
4200 {
4201 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4202 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4203 cErrors++;
4204 }
4205 fIgnoreFlags |= X86_PTE_RW;
4206 }
4207 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4208 {
4209 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4210 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4211 cErrors++;
4212 continue;
4213 }
4214
4215 /* flags */
4216 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4217 {
4218 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4219 {
4220 if (PteDst.n.u1Write)
4221 {
4222 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4223 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4224 cErrors++;
4225 continue;
4226 }
4227 fIgnoreFlags |= X86_PTE_RW;
4228 }
4229 else
4230 {
4231 if ( PteDst.n.u1Present
4232# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4233 && !PGM_PAGE_IS_MMIO(pPhysPage)
4234# endif
4235 )
4236 {
4237 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4238 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4239 cErrors++;
4240 continue;
4241 }
4242 fIgnoreFlags |= X86_PTE_P;
4243 }
4244 }
4245 else
4246 {
4247 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4248 {
4249 if (PteDst.n.u1Write)
4250 {
4251 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4252 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4253 cErrors++;
4254 continue;
4255 }
4256 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4257 {
4258 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4259 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4260 cErrors++;
4261 continue;
4262 }
4263 if (PteDst.n.u1Dirty)
4264 {
4265 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4266 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4267 cErrors++;
4268 }
4269# if 0 /** @todo sync access bit properly... */
4270 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4271 {
4272 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4273 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4274 cErrors++;
4275 }
4276 fIgnoreFlags |= X86_PTE_RW;
4277# else
4278 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4279# endif
4280 }
4281 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4282 {
4283 /* access bit emulation (not implemented). */
4284 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4285 {
4286 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4287 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4288 cErrors++;
4289 continue;
4290 }
4291 if (!PteDst.n.u1Accessed)
4292 {
4293 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4294 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4295 cErrors++;
4296 }
4297 fIgnoreFlags |= X86_PTE_P;
4298 }
4299# ifdef DEBUG_sandervl
4300 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4301# endif
4302 }
4303
4304 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4305 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4306 )
4307 {
4308 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4309 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4310 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4311 cErrors++;
4312 continue;
4313 }
4314 } /* foreach PTE */
4315 }
4316 else
4317 {
4318 /*
4319 * Big Page.
4320 */
4321 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4322 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4323 {
4324 if (PdeDst.n.u1Write)
4325 {
4326 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4327 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4328 cErrors++;
4329 continue;
4330 }
4331 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4332 {
4333 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4334 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4335 cErrors++;
4336 continue;
4337 }
4338# if 0 /** @todo sync access bit properly... */
4339 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4340 {
4341 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4342 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4343 cErrors++;
4344 }
4345 fIgnoreFlags |= X86_PTE_RW;
4346# else
4347 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4348# endif
4349 }
4350 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4351 {
4352 /* access bit emulation (not implemented). */
4353 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4354 {
4355 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4356 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4357 cErrors++;
4358 continue;
4359 }
4360 if (!PdeDst.n.u1Accessed)
4361 {
4362 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4363 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4364 cErrors++;
4365 }
4366 fIgnoreFlags |= X86_PTE_P;
4367 }
4368
4369 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4370 {
4371 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4372 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4373 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4374 cErrors++;
4375 }
4376
4377 /* iterate the page table. */
4378 for (unsigned iPT = 0, off = 0;
4379 iPT < RT_ELEMENTS(pPTDst->a);
4380 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4381 {
4382 const SHWPTE PteDst = pPTDst->a[iPT];
4383
4384 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4385 {
4386 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4387 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4388 cErrors++;
4389 }
4390
4391 /* skip not-present entries. */
4392 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4393 continue;
4394
4395 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4396
4397 /* match the physical addresses */
4398 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4399
4400# ifdef IN_RING3
4401 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4402 if (RT_FAILURE(rc))
4403 {
4404 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4405 {
4406 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4407 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4408 cErrors++;
4409 }
4410 }
4411 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4412 {
4413 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4414 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4415 cErrors++;
4416 continue;
4417 }
4418# endif
4419 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4420 if (!pPhysPage)
4421 {
4422# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4423 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4424 {
4425 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4426 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4427 cErrors++;
4428 continue;
4429 }
4430# endif
4431 if (PteDst.n.u1Write)
4432 {
4433 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4434 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4435 cErrors++;
4436 }
4437 fIgnoreFlags |= X86_PTE_RW;
4438 }
4439 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4440 {
4441 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4442 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4443 cErrors++;
4444 continue;
4445 }
4446
4447 /* flags */
4448 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4449 {
4450 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4451 {
4452 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4453 {
4454 if (PteDst.n.u1Write)
4455 {
4456 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4457 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4458 cErrors++;
4459 continue;
4460 }
4461 fIgnoreFlags |= X86_PTE_RW;
4462 }
4463 }
4464 else
4465 {
4466 if ( PteDst.n.u1Present
4467# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4468 && !PGM_PAGE_IS_MMIO(pPhysPage)
4469# endif
4470 )
4471 {
4472 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4473 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4474 cErrors++;
4475 continue;
4476 }
4477 fIgnoreFlags |= X86_PTE_P;
4478 }
4479 }
4480
4481 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4482 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4483 )
4484 {
4485 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4486 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4487 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4488 cErrors++;
4489 continue;
4490 }
4491 } /* for each PTE */
4492 }
4493 }
4494 /* not present */
4495
4496 } /* for each PDE */
4497
4498 } /* for each PDPTE */
4499
4500 } /* for each PML4E */
4501
4502# ifdef DEBUG
4503 if (cErrors)
4504 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4505# endif
4506
4507#endif /* GST == 32BIT, PAE or AMD64 */
4508 return cErrors;
4509
4510#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4511}
4512#endif /* VBOX_STRICT */
4513
4514
4515/**
4516 * Sets up the CR3 for shadow paging
4517 *
4518 * @returns Strict VBox status code.
4519 * @retval VINF_SUCCESS.
4520 *
4521 * @param pVCpu The VMCPU handle.
4522 * @param GCPhysCR3 The physical address in the CR3 register.
4523 */
4524PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4525{
4526 PVM pVM = pVCpu->CTX_SUFF(pVM);
4527
4528 /* Update guest paging info. */
4529#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4530 || PGM_GST_TYPE == PGM_TYPE_PAE \
4531 || PGM_GST_TYPE == PGM_TYPE_AMD64
4532
4533 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4534
4535 /*
4536 * Map the page CR3 points at.
4537 */
4538 RTHCPTR HCPtrGuestCR3;
4539 RTHCPHYS HCPhysGuestCR3;
4540 pgmLock(pVM);
4541 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4542 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4543 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4544 /** @todo this needs some reworking wrt. locking. */
4545# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4546 HCPtrGuestCR3 = NIL_RTHCPTR;
4547 int rc = VINF_SUCCESS;
4548# else
4549 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4550# endif
4551 pgmUnlock(pVM);
4552 if (RT_SUCCESS(rc))
4553 {
4554 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4555 if (RT_SUCCESS(rc))
4556 {
4557# ifdef IN_RC
4558 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4559# endif
4560# if PGM_GST_TYPE == PGM_TYPE_32BIT
4561 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4562# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4563 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4564# endif
4565 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4566
4567# elif PGM_GST_TYPE == PGM_TYPE_PAE
4568 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4569 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4570# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4571 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4572# endif
4573 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4574 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4575
4576 /*
4577 * Map the 4 PDs too.
4578 */
4579 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4580 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4581 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4582 {
4583 if (pGuestPDPT->a[i].n.u1Present)
4584 {
4585 RTHCPTR HCPtr;
4586 RTHCPHYS HCPhys;
4587 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4588 pgmLock(pVM);
4589 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4590 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4591 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4592# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4593 HCPtr = NIL_RTHCPTR;
4594 int rc2 = VINF_SUCCESS;
4595# else
4596 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4597# endif
4598 pgmUnlock(pVM);
4599 if (RT_SUCCESS(rc2))
4600 {
4601 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4602 AssertRCReturn(rc, rc);
4603
4604 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4605# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4606 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4607# endif
4608 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4609 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4610# ifdef IN_RC
4611 PGM_INVL_PG(pVCpu, GCPtr);
4612# endif
4613 continue;
4614 }
4615 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4616 }
4617
4618 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4619# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4620 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4621# endif
4622 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4623 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4624# ifdef IN_RC
4625 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4626# endif
4627 }
4628
4629# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4630 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4631# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4632 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4633# endif
4634# endif
4635 }
4636 else
4637 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4638 }
4639 else
4640 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4641
4642#else /* prot/real stub */
4643 int rc = VINF_SUCCESS;
4644#endif
4645
4646 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4647# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4648 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4649 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4650 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4651 && PGM_GST_TYPE != PGM_TYPE_PROT))
4652
4653 Assert(!HWACCMIsNestedPagingActive(pVM));
4654
4655 /*
4656 * Update the shadow root page as well since that's not fixed.
4657 */
4658 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4659 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4660 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4661 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4662 PPGMPOOLPAGE pNewShwPageCR3;
4663
4664 pgmLock(pVM);
4665
4666# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4667 if (pPool->cDirtyPages)
4668 pgmPoolResetDirtyPages(pVM);
4669# endif
4670
4671 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4672 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4673 AssertFatalRC(rc);
4674 rc = VINF_SUCCESS;
4675
4676# ifdef IN_RC
4677 /*
4678 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4679 * state will be inconsistent! Flush important things now while
4680 * we still can and then make sure there are no ring-3 calls.
4681 */
4682 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4683 VMMRZCallRing3Disable(pVCpu);
4684# endif
4685
4686 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4687 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4688 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4689# ifdef IN_RING0
4690 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4691 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4692# elif defined(IN_RC)
4693 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4694 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4695# else
4696 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4697 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4698# endif
4699
4700# ifndef PGM_WITHOUT_MAPPINGS
4701 /*
4702 * Apply all hypervisor mappings to the new CR3.
4703 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4704 * make sure we check for conflicts in the new CR3 root.
4705 */
4706# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4707 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4708# endif
4709 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4710 AssertRCReturn(rc, rc);
4711# endif
4712
4713 /* Set the current hypervisor CR3. */
4714 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4715 SELMShadowCR3Changed(pVM, pVCpu);
4716
4717# ifdef IN_RC
4718 /* NOTE: The state is consistent again. */
4719 VMMRZCallRing3Enable(pVCpu);
4720# endif
4721
4722 /* Clean up the old CR3 root. */
4723 if ( pOldShwPageCR3
4724 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4725 {
4726 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4727# ifndef PGM_WITHOUT_MAPPINGS
4728 /* Remove the hypervisor mappings from the shadow page table. */
4729 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4730# endif
4731 /* Mark the page as unlocked; allow flushing again. */
4732 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4733
4734 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4735 }
4736 pgmUnlock(pVM);
4737# endif
4738
4739 return rc;
4740}
4741
4742/**
4743 * Unmaps the shadow CR3.
4744 *
4745 * @returns VBox status, no specials.
4746 * @param pVCpu The VMCPU handle.
4747 */
4748PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4749{
4750 LogFlow(("UnmapCR3\n"));
4751
4752 int rc = VINF_SUCCESS;
4753 PVM pVM = pVCpu->CTX_SUFF(pVM);
4754
4755 /*
4756 * Update guest paging info.
4757 */
4758#if PGM_GST_TYPE == PGM_TYPE_32BIT
4759 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4760# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4761 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4762# endif
4763 pVCpu->pgm.s.pGst32BitPdRC = 0;
4764
4765#elif PGM_GST_TYPE == PGM_TYPE_PAE
4766 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4767# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4768 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4769# endif
4770 pVCpu->pgm.s.pGstPaePdptRC = 0;
4771 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4772 {
4773 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4774# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4775 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4776# endif
4777 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4778 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4779 }
4780
4781#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4782 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4783# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4784 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4785# endif
4786
4787#else /* prot/real mode stub */
4788 /* nothing to do */
4789#endif
4790
4791#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4792 /*
4793 * Update shadow paging info.
4794 */
4795# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4796 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4797 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4798
4799# if PGM_GST_TYPE != PGM_TYPE_REAL
4800 Assert(!HWACCMIsNestedPagingActive(pVM));
4801# endif
4802
4803 pgmLock(pVM);
4804
4805# ifndef PGM_WITHOUT_MAPPINGS
4806 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4807 /* Remove the hypervisor mappings from the shadow page table. */
4808 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4809# endif
4810
4811 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4812 {
4813 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4814
4815 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4816
4817# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4818 if (pPool->cDirtyPages)
4819 pgmPoolResetDirtyPages(pVM);
4820# endif
4821
4822 /* Mark the page as unlocked; allow flushing again. */
4823 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4824
4825 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4826 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4827 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4828 pVCpu->pgm.s.pShwPageCR3RC = 0;
4829 pVCpu->pgm.s.iShwUser = 0;
4830 pVCpu->pgm.s.iShwUserTable = 0;
4831 }
4832 pgmUnlock(pVM);
4833# endif
4834#endif /* !IN_RC*/
4835
4836 return rc;
4837}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette