VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 31066

Last change on this file since 31066 was 31066, checked in by vboxsync, 14 years ago

PGM: A couple of simplifications and optimizations.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 208.1 KB
Line 
1/* $Id: PGMAllBth.h 31066 2010-07-23 14:48:24Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
38PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
39PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
40PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
41PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
42PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
43#ifdef VBOX_STRICT
44PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
45#endif
46DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
47PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
48PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
49RT_C_DECLS_END
50
51
52/*
53 * Filter out some illegal combinations of guest and shadow paging, so we can
54 * remove redundant checks inside functions.
55 */
56#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
57# error "Invalid combination; PAE guest implies PAE shadow"
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
61 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
62# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
63#endif
64
65#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
66 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
67# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
68#endif
69
70#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
71 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
72# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
73#endif
74
75
76#ifndef IN_RING3
77/**
78 * #PF Handler for raw-mode guest execution.
79 *
80 * @returns VBox status code (appropriate for trap handling and GC return).
81 *
82 * @param pVCpu VMCPU Handle.
83 * @param uErr The trap error code.
84 * @param pRegFrame Trap register frame.
85 * @param pvFault The fault address.
86 * @param pfLockTaken PGM lock taken here or not (out)
87 */
88PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
89{
90 PVM pVM = pVCpu->CTX_SUFF(pVM);
91
92 *pfLockTaken = false;
93
94# if defined(IN_RC) && defined(VBOX_STRICT)
95 PGMDynCheckLocks(pVM);
96# endif
97
98# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
99 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
100 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
101 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
102
103# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
104 /*
105 * Hide the instruction fetch trap indicator for now.
106 */
107 /** @todo NXE will change this and we must fix NXE in the switcher too! */
108 if (uErr & X86_TRAP_PF_ID)
109 {
110 uErr &= ~X86_TRAP_PF_ID;
111 TRPMSetErrorCode(pVCpu, uErr);
112 }
113# endif
114
115 /*
116 * Get PDs.
117 */
118 int rc;
119# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
120# if PGM_GST_TYPE == PGM_TYPE_32BIT
121 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
122 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
123
124# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
125
126# if PGM_GST_TYPE == PGM_TYPE_PAE
127 unsigned iPDSrc = 0; /* initialized to shut up gcc */
128 X86PDPE PdpeSrc;
129 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, pvFault, &iPDSrc, &PdpeSrc);
130
131# elif PGM_GST_TYPE == PGM_TYPE_AMD64
132 unsigned iPDSrc = 0; /* initialized to shut up gcc */
133 PX86PML4E pPml4eSrc = NULL; /* ditto */
134 X86PDPE PdpeSrc;
135 PGSTPD pPDSrc;
136
137 pPDSrc = pgmGstGetLongModePDPtr(pVCpu, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
138 Assert(pPml4eSrc);
139# endif
140
141 /* Quick check for a valid guest trap. (PAE & AMD64) */
142 if (!pPDSrc)
143 {
144# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
145 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
146# else
147 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
148# endif
149 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
150 TRPMSetErrorCode(pVCpu, uErr);
151 return VINF_EM_RAW_GUEST_TRAP;
152 }
153# endif
154
155# else /* !PGM_WITH_PAGING */
156 PGSTPD pPDSrc = NULL;
157 const unsigned iPDSrc = 0;
158# endif /* !PGM_WITH_PAGING */
159
160# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
161 /*
162 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
163 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
164 */
165 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
166 && MMHyperIsInsideArea(pVM, pvFault))
167 {
168 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
169 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
170 return VINF_EM_RAW_EMULATE_INSTR;
171 }
172# endif
173
174 /*
175 * First check for a genuine guest page fault.
176 */
177 /** @todo This duplicates the page table walk we're doing below. Need to
178 * find some way to avoid this double work, probably by caching
179 * the data. */
180# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
181 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
182 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
183 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
184 if (rc == VINF_EM_RAW_GUEST_TRAP)
185 {
186 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
187 return rc;
188 }
189# endif /* PGM_WITH_PAGING */
190
191 /* Take the big lock now. */
192 *pfLockTaken = true;
193 pgmLock(pVM);
194
195 /*
196 * Fetch the guest PDE, PDPE and PML4E.
197 */
198# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
199 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
200# else
201 GSTPDE PdeSrc;
202 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
203 PdeSrc.n.u1Present = 1;
204 PdeSrc.n.u1Write = 1;
205 PdeSrc.n.u1Accessed = 1;
206 PdeSrc.n.u1User = 1;
207# endif
208
209# if PGM_SHW_TYPE == PGM_TYPE_32BIT
210 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
211 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
212
213# elif PGM_SHW_TYPE == PGM_TYPE_PAE
214 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
215
216 PX86PDPAE pPDDst;
217# if PGM_GST_TYPE != PGM_TYPE_PAE
218 X86PDPE PdpeSrc;
219
220 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
221 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
222# endif
223 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, &PdpeSrc, &pPDDst);
224 if (rc != VINF_SUCCESS)
225 {
226 AssertRC(rc);
227 return rc;
228 }
229 Assert(pPDDst);
230
231# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
232 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
233 PX86PDPAE pPDDst;
234# if PGM_GST_TYPE == PGM_TYPE_PROT
235 /* AMD-V nested paging */
236 X86PML4E Pml4eSrc;
237 X86PDPE PdpeSrc;
238 PX86PML4E pPml4eSrc = &Pml4eSrc;
239
240 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
241 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
242 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
243# endif
244
245 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc, &PdpeSrc, &pPDDst);
246 if (rc != VINF_SUCCESS)
247 {
248 AssertRC(rc);
249 return rc;
250 }
251 Assert(pPDDst);
252
253# elif PGM_SHW_TYPE == PGM_TYPE_EPT
254 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
255 PEPTPD pPDDst;
256
257 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
258 if (rc != VINF_SUCCESS)
259 {
260 AssertRC(rc);
261 return rc;
262 }
263 Assert(pPDDst);
264# endif
265
266# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
267 /* Dirty page handling. */
268 if (uErr & X86_TRAP_PF_RW) /* write fault? */
269 {
270 /*
271 * If we successfully correct the write protection fault due to dirty bit
272 * tracking, then return immediately.
273 */
274 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
275 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
276 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
277 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
278 {
279 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
280 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
281 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
282 return VINF_SUCCESS;
283 }
284 }
285
286# if 0 /* rarely useful; leave for debugging. */
287 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
288# endif
289# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
290
291 /*
292 * A common case is the not-present error caused by lazy page table syncing.
293 *
294 * It is IMPORTANT that we weed out any access to non-present shadow PDEs here
295 * so we can safely assume that the shadow PT is present when calling SyncPage later.
296 *
297 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
298 * of mapping conflict and defer to SyncCR3 in R3.
299 * (Again, we do NOT support access handlers for non-present guest pages.)
300 *
301 */
302 Assert(PdeSrc.n.u1Present);
303 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
304 && !pPDDst->a[iPDDst].n.u1Present
305 )
306 {
307 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
308 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
309 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
310 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
311 if (RT_SUCCESS(rc))
312 {
313 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
314 return rc;
315 }
316 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
317 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
318 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
319 return VINF_PGM_SYNC_CR3;
320 }
321
322# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
323 /*
324 * Check if this address is within any of our mappings.
325 *
326 * This is *very* fast and it's gonna save us a bit of effort below and prevent
327 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
328 * (BTW, it's impossible to have physical access handlers in a mapping.)
329 */
330 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
331 {
332 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
333 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
334 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
335 {
336 if (pvFault < pMapping->GCPtr)
337 break;
338 if (pvFault - pMapping->GCPtr < pMapping->cb)
339 {
340 /*
341 * The first thing we check is if we've got an undetected conflict.
342 */
343 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
344 {
345 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
346 while (iPT-- > 0)
347 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
348 {
349 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
350 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
351 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
352 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
353 return VINF_PGM_SYNC_CR3;
354 }
355 }
356
357 /*
358 * Check if the fault address is in a virtual page access handler range.
359 */
360 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
361 if ( pCur
362 && pvFault - pCur->Core.Key < pCur->cb
363 && uErr & X86_TRAP_PF_RW)
364 {
365# ifdef IN_RC
366 STAM_PROFILE_START(&pCur->Stat, h);
367 pgmUnlock(pVM);
368 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
369 pgmLock(pVM);
370 STAM_PROFILE_STOP(&pCur->Stat, h);
371# else
372 AssertFailed();
373 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
374# endif
375 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
376 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
377 return rc;
378 }
379
380 /*
381 * Pretend we're not here and let the guest handle the trap.
382 */
383 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
384 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
385 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
386 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
387 return VINF_EM_RAW_GUEST_TRAP;
388 }
389 }
390 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
391 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
392# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
393
394 /*
395 * Check if this fault address is flagged for special treatment,
396 * which means we'll have to figure out the physical address and
397 * check flags associated with it.
398 *
399 * ASSUME that we can limit any special access handling to pages
400 * in page tables which the guest believes to be present.
401 */
402 Assert(PdeSrc.n.u1Present);
403 {
404 RTGCPHYS GCPhys = NIL_RTGCPHYS;
405
406# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
407 if ( PdeSrc.b.u1Size
408# if PGM_GST_TYPE == PGM_TYPE_32BIT
409 && CPUMIsGuestPageSizeExtEnabled(pVCpu)
410# endif
411 )
412 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc)
413 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
414 else
415 {
416 PGSTPT pPTSrc;
417 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
418 if (RT_SUCCESS(rc))
419 {
420 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
421 if (pPTSrc->a[iPTESrc].n.u1Present)
422 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
423 }
424 }
425# else
426 /* No paging so the fault address is the physical address */
427 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
428# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
429
430 /*
431 * If we have a GC address we'll check if it has any flags set.
432 */
433 if (GCPhys != NIL_RTGCPHYS)
434 {
435 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
436
437 PPGMPAGE pPage;
438 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
439 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
440 {
441 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
442 {
443 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
444 {
445 /*
446 * Physical page access handler.
447 */
448 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
449 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
450 if (pCur)
451 {
452# ifdef PGM_SYNC_N_PAGES
453 /*
454 * If the region is write protected and we got a page not present fault, then sync
455 * the pages. If the fault was caused by a read, then restart the instruction.
456 * In case of write access continue to the GC write handler.
457 *
458 * ASSUMES that there is only one handler per page or that they have similar write properties.
459 */
460 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
461 && !(uErr & X86_TRAP_PF_P))
462 {
463 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
464 if ( RT_FAILURE(rc)
465 || !(uErr & X86_TRAP_PF_RW)
466 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
467 {
468 AssertRC(rc);
469 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
470 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
471 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
472 return rc;
473 }
474 }
475# endif
476
477 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
478 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
479 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
480
481# if defined(IN_RC) || defined(IN_RING0)
482 if (pCur->CTX_SUFF(pfnHandler))
483 {
484 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
485# ifdef IN_RING0
486 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
487# else
488 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
489# endif
490 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
491 void *pvUser = pCur->CTX_SUFF(pvUser);
492
493 STAM_PROFILE_START(&pCur->Stat, h);
494 if (fLeaveLock)
495 pgmUnlock(pVM); /* @todo: Not entirely safe. */
496
497 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
498 if (fLeaveLock)
499 pgmLock(pVM);
500# ifdef VBOX_WITH_STATISTICS
501 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
502 if (pCur)
503 STAM_PROFILE_STOP(&pCur->Stat, h);
504# else
505 pCur = NULL; /* might be invalid by now. */
506# endif
507
508 }
509 else
510# endif
511 rc = VINF_EM_RAW_EMULATE_INSTR;
512
513 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
514 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
515 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
516 return rc;
517 }
518 }
519# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
520 else
521 {
522# ifdef PGM_SYNC_N_PAGES
523 /*
524 * If the region is write protected and we got a page not present fault, then sync
525 * the pages. If the fault was caused by a read, then restart the instruction.
526 * In case of write access continue to the GC write handler.
527 */
528 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
529 && !(uErr & X86_TRAP_PF_P))
530 {
531 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
532 if ( RT_FAILURE(rc)
533 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
534 || !(uErr & X86_TRAP_PF_RW))
535 {
536 AssertRC(rc);
537 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
538 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
539 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
540 return rc;
541 }
542 }
543# endif
544 /*
545 * Ok, it's an virtual page access handler.
546 *
547 * Since it's faster to search by address, we'll do that first
548 * and then retry by GCPhys if that fails.
549 */
550 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
551 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
552 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
553 */
554 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
555 if (pCur)
556 {
557 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
558 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
559 || !(uErr & X86_TRAP_PF_P)
560 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
561 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
562
563 if ( pvFault - pCur->Core.Key < pCur->cb
564 && ( uErr & X86_TRAP_PF_RW
565 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
566 {
567# ifdef IN_RC
568 STAM_PROFILE_START(&pCur->Stat, h);
569 pgmUnlock(pVM);
570 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
571 pgmLock(pVM);
572 STAM_PROFILE_STOP(&pCur->Stat, h);
573# else
574 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
575# endif
576 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
577 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
578 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
579 return rc;
580 }
581 /* Unhandled part of a monitored page */
582 }
583 else
584 {
585 /* Check by physical address. */
586 unsigned iPage;
587 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
588 &pCur, &iPage);
589 Assert(RT_SUCCESS(rc) || !pCur);
590 if ( pCur
591 && ( uErr & X86_TRAP_PF_RW
592 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
593 {
594 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
595# ifdef IN_RC
596 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
597 Assert(off < pCur->cb);
598 STAM_PROFILE_START(&pCur->Stat, h);
599 pgmUnlock(pVM);
600 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
601 pgmLock(pVM);
602 STAM_PROFILE_STOP(&pCur->Stat, h);
603# else
604 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
605# endif
606 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
607 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
608 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
609 return rc;
610 }
611 }
612 }
613# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
614
615 /*
616 * There is a handled area of the page, but this fault doesn't belong to it.
617 * We must emulate the instruction.
618 *
619 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
620 * we first check if this was a page-not-present fault for a page with only
621 * write access handlers. Restart the instruction if it wasn't a write access.
622 */
623 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
624
625 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
626 && !(uErr & X86_TRAP_PF_P))
627 {
628 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
629 if ( RT_FAILURE(rc)
630 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
631 || !(uErr & X86_TRAP_PF_RW))
632 {
633 AssertRC(rc);
634 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
635 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
636 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
637 return rc;
638 }
639 }
640
641 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
642 * It's writing to an unhandled part of the LDT page several million times.
643 */
644 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
645 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
646 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
647 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
648 return rc;
649 } /* if any kind of handler */
650
651# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
652 if (uErr & X86_TRAP_PF_P)
653 {
654 /*
655 * The page isn't marked, but it might still be monitored by a virtual page access handler.
656 * (ASSUMES no temporary disabling of virtual handlers.)
657 */
658 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
659 * we should correct both the shadow page table and physical memory flags, and not only check for
660 * accesses within the handler region but for access to pages with virtual handlers. */
661 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
662 if (pCur)
663 {
664 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
665 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
666 || !(uErr & X86_TRAP_PF_P)
667 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
668 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
669
670 if ( pvFault - pCur->Core.Key < pCur->cb
671 && ( uErr & X86_TRAP_PF_RW
672 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
673 {
674# ifdef IN_RC
675 STAM_PROFILE_START(&pCur->Stat, h);
676 pgmUnlock(pVM);
677 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
678 pgmLock(pVM);
679 STAM_PROFILE_STOP(&pCur->Stat, h);
680# else
681 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
682# endif
683 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
684 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
685 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
686 return rc;
687 }
688 }
689 }
690# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
691 }
692 else
693 {
694 /*
695 * When the guest accesses invalid physical memory (e.g. probing
696 * of RAM or accessing a remapped MMIO range), then we'll fall
697 * back to the recompiler to emulate the instruction.
698 */
699 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
700 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
701 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
702 return VINF_EM_RAW_EMULATE_INSTR;
703 }
704
705 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
706
707# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
708 /*
709 * We are here only if page is present in Guest page tables and
710 * trap is not handled by our handlers.
711 *
712 * Check it for page out-of-sync situation.
713 */
714 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
715
716 if (!(uErr & X86_TRAP_PF_P))
717 {
718 /*
719 * Page is not present in our page tables.
720 * Try to sync it!
721 * BTW, fPageShw is invalid in this branch!
722 */
723 if (uErr & X86_TRAP_PF_US)
724 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
725 else /* supervisor */
726 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
727
728 if (PGM_PAGE_IS_BALLOONED(pPage))
729 {
730 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
731 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
732 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
733 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
734 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
735 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
736 return rc;
737 }
738# if defined(LOG_ENABLED) && !defined(IN_RING0)
739 RTGCPHYS GCPhys2;
740 uint64_t fPageGst2;
741 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
742 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
743 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
744# endif /* LOG_ENABLED */
745
746# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
747 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
748 {
749 uint64_t fPageGst;
750 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
751 if ( RT_SUCCESS(rc)
752 && !(fPageGst & X86_PTE_US))
753 {
754 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
755 if ( pvFault == (RTGCPTR)pRegFrame->eip
756 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
757# ifdef CSAM_DETECT_NEW_CODE_PAGES
758 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
759 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
760# endif /* CSAM_DETECT_NEW_CODE_PAGES */
761 )
762 {
763 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
764 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
765 if (rc != VINF_SUCCESS)
766 {
767 /*
768 * CSAM needs to perform a job in ring 3.
769 *
770 * Sync the page before going to the host context; otherwise we'll end up in a loop if
771 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
772 */
773 LogFlow(("CSAM ring 3 job\n"));
774 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
775 AssertRC(rc2);
776
777 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
778 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
779 return rc;
780 }
781 }
782# ifdef CSAM_DETECT_NEW_CODE_PAGES
783 else if ( uErr == X86_TRAP_PF_RW
784 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
785 && pRegFrame->ecx < 0x10000)
786 {
787 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
788 * to detect loading of new code pages.
789 */
790
791 /*
792 * Decode the instruction.
793 */
794 RTGCPTR PC;
795 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
796 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
797 if (rc == VINF_SUCCESS)
798 {
799 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
800 uint32_t cbOp;
801 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
802
803 /* For now we'll restrict this to rep movsw/d instructions */
804 if ( rc == VINF_SUCCESS
805 && pDis->pCurInstr->opcode == OP_MOVSWD
806 && (pDis->prefix & PREFIX_REP))
807 {
808 CSAMMarkPossibleCodePage(pVM, pvFault);
809 }
810 }
811 }
812# endif /* CSAM_DETECT_NEW_CODE_PAGES */
813
814 /*
815 * Mark this page as safe.
816 */
817 /** @todo not correct for pages that contain both code and data!! */
818 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
819 CSAMMarkPage(pVM, pvFault, true);
820 }
821 }
822# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
823 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
824 if (RT_SUCCESS(rc))
825 {
826 /* The page was successfully synced, return to the guest. */
827 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
828 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
829 return VINF_SUCCESS;
830 }
831 }
832 else /* uErr & X86_TRAP_PF_P: */
833 {
834 /*
835 * Write protected pages are make writable when the guest makes the first
836 * write to it. This happens for pages that are shared, write monitored
837 * and not yet allocated.
838 *
839 * Also, a side effect of not flushing global PDEs are out of sync pages due
840 * to physical monitored regions, that are no longer valid.
841 * Assume for now it only applies to the read/write flag.
842 */
843 if ( RT_SUCCESS(rc)
844 && (uErr & X86_TRAP_PF_RW))
845 {
846 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
847 {
848 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
849 Assert(!PGM_PAGE_IS_ZERO(pPage));
850 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
851
852 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
853 if (rc != VINF_SUCCESS)
854 {
855 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
856 return rc;
857 }
858 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
859 return VINF_EM_NO_MEMORY;
860 }
861
862# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
863 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
864 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
865 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
866 {
867 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
868 uint64_t fPageGst;
869 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
870 if ( RT_SUCCESS(rc)
871 && !(fPageGst & X86_PTE_RW))
872 {
873 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
874 if (RT_SUCCESS(rc))
875 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
876 else
877 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
878 return rc;
879 }
880 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
881 }
882# endif
883 /// @todo count the above case; else
884 if (uErr & X86_TRAP_PF_US)
885 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
886 else /* supervisor */
887 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
888
889 /*
890 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
891 * page is not present, which is not true in this case.
892 */
893 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
894 if (RT_SUCCESS(rc))
895 {
896 /*
897 * Page was successfully synced, return to guest.
898 * First invalidate the page as it might be in the TLB.
899 */
900# if PGM_SHW_TYPE == PGM_TYPE_EPT
901 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
902# else
903 PGM_INVL_PG(pVCpu, pvFault);
904# endif
905# ifdef VBOX_STRICT
906 RTGCPHYS GCPhys2;
907 uint64_t fPageGst;
908 Assert(HWACCMIsNestedPagingActive(pVM) == pVM->pgm.s.fNestedPaging);
909 if (!pVM->pgm.s.fNestedPaging)
910 {
911 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
912 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%d fPageGst=%RX64\n"));
913 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
914 }
915 uint64_t fPageShw;
916 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
917 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
918# endif /* VBOX_STRICT */
919 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
920 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
921 return VINF_SUCCESS;
922 }
923 }
924
925# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
926# ifdef VBOX_STRICT
927 /*
928 * Check for VMM page flags vs. Guest page flags consistency.
929 * Currently only for debug purposes.
930 */
931 if (RT_SUCCESS(rc))
932 {
933 /* Get guest page flags. */
934 uint64_t fPageGst;
935 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
936 if (RT_SUCCESS(rc))
937 {
938 uint64_t fPageShw;
939 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
940
941 /*
942 * Compare page flags.
943 * Note: we have AVL, A, D bits desynched.
944 */
945 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
946 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
947 }
948 else
949 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
950 }
951 else
952 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
953# endif /* VBOX_STRICT */
954# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
955 }
956 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
957# endif /* PGM_OUT_OF_SYNC_IN_GC */
958 }
959 else /* GCPhys == NIL_RTGCPHYS */
960 {
961 /*
962 * Page not present in Guest OS or invalid page table address.
963 * This is potential virtual page access handler food.
964 *
965 * For the present we'll say that our access handlers don't
966 * work for this case - we've already discarded the page table
967 * not present case which is identical to this.
968 *
969 * When we perchance find we need this, we will probably have AVL
970 * trees (offset based) to operate on and we can measure their speed
971 * agains mapping a page table and probably rearrange this handling
972 * a bit. (Like, searching virtual ranges before checking the
973 * physical address.)
974 */
975 }
976 }
977
978# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
979 /*
980 * Conclusion, this is a guest trap.
981 */
982 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
983 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
984 return VINF_EM_RAW_GUEST_TRAP;
985# else
986 /* present, but not a monitored page; perhaps the guest is probing physical memory */
987 return VINF_EM_RAW_EMULATE_INSTR;
988# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
989
990
991# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
992
993 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
994 return VERR_INTERNAL_ERROR;
995# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
996}
997#endif /* !IN_RING3 */
998
999
1000/**
1001 * Emulation of the invlpg instruction.
1002 *
1003 *
1004 * @returns VBox status code.
1005 *
1006 * @param pVCpu The VMCPU handle.
1007 * @param GCPtrPage Page to invalidate.
1008 *
1009 * @remark ASSUMES that the guest is updating before invalidating. This order
1010 * isn't required by the CPU, so this is speculative and could cause
1011 * trouble.
1012 * @remark No TLB shootdown is done on any other VCPU as we assume that
1013 * invlpg emulation is the *only* reason for calling this function.
1014 * (The guest has to shoot down TLB entries on other CPUs itself)
1015 * Currently true, but keep in mind!
1016 *
1017 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1018 */
1019PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1020{
1021#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1022 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1023 && PGM_SHW_TYPE != PGM_TYPE_EPT
1024 int rc;
1025 PVM pVM = pVCpu->CTX_SUFF(pVM);
1026 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1027
1028 Assert(PGMIsLockOwner(pVM));
1029
1030 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1031
1032# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1033 if (pPool->cDirtyPages)
1034 pgmPoolResetDirtyPages(pVM);
1035# endif
1036
1037 /*
1038 * Get the shadow PD entry and skip out if this PD isn't present.
1039 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1040 */
1041# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1042 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1043 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1044
1045 /* Fetch the pgm pool shadow descriptor. */
1046 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1047 Assert(pShwPde);
1048
1049# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1050 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1051 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1052
1053 /* If the shadow PDPE isn't present, then skip the invalidate. */
1054 if (!pPdptDst->a[iPdpt].n.u1Present)
1055 {
1056 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1057 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1058 return VINF_SUCCESS;
1059 }
1060
1061 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1062 PPGMPOOLPAGE pShwPde = NULL;
1063 PX86PDPAE pPDDst;
1064
1065 /* Fetch the pgm pool shadow descriptor. */
1066 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1067 AssertRCSuccessReturn(rc, rc);
1068 Assert(pShwPde);
1069
1070 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1071 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1072
1073# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1074 /* PML4 */
1075 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1076 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1077 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1078 PX86PDPAE pPDDst;
1079 PX86PDPT pPdptDst;
1080 PX86PML4E pPml4eDst;
1081 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1082 if (rc != VINF_SUCCESS)
1083 {
1084 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1085 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1086 return VINF_SUCCESS;
1087 }
1088 Assert(pPDDst);
1089
1090 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1091 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1092
1093 if (!pPdpeDst->n.u1Present)
1094 {
1095 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1096 return VINF_SUCCESS;
1097 }
1098
1099 /* Fetch the pgm pool shadow descriptor. */
1100 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1101 Assert(pShwPde);
1102
1103# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1104
1105 const SHWPDE PdeDst = *pPdeDst;
1106 if (!PdeDst.n.u1Present)
1107 {
1108 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1109 return VINF_SUCCESS;
1110 }
1111
1112# if defined(IN_RC)
1113 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1114 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1115# endif
1116
1117 /*
1118 * Get the guest PD entry and calc big page.
1119 */
1120# if PGM_GST_TYPE == PGM_TYPE_32BIT
1121 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1122 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1123 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1124# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1125 unsigned iPDSrc = 0;
1126# if PGM_GST_TYPE == PGM_TYPE_PAE
1127 X86PDPE PdpeSrcIgn;
1128 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1129# else /* AMD64 */
1130 PX86PML4E pPml4eSrcIgn;
1131 X86PDPE PdpeSrcIgn;
1132 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1133# endif
1134 GSTPDE PdeSrc;
1135
1136 if (pPDSrc)
1137 PdeSrc = pPDSrc->a[iPDSrc];
1138 else
1139 PdeSrc.u = 0;
1140# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1141
1142# if PGM_GST_TYPE == PGM_TYPE_32BIT
1143 const bool fIsBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1144# else
1145 const bool fIsBigPage = PdeSrc.b.u1Size;
1146# endif
1147
1148# ifdef IN_RING3
1149 /*
1150 * If a CR3 Sync is pending we may ignore the invalidate page operation
1151 * depending on the kind of sync and if it's a global page or not.
1152 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1153 */
1154# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1155 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1156 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1157 && fIsBigPage
1158 && PdeSrc.b.u1Global
1159 )
1160 )
1161# else
1162 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1163# endif
1164 {
1165 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1166 return VINF_SUCCESS;
1167 }
1168# endif /* IN_RING3 */
1169
1170 /*
1171 * Deal with the Guest PDE.
1172 */
1173 rc = VINF_SUCCESS;
1174 if (PdeSrc.n.u1Present)
1175 {
1176 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1177 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1178# ifndef PGM_WITHOUT_MAPPING
1179 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1180 {
1181 /*
1182 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1183 */
1184 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1185 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1186 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1187 }
1188 else
1189# endif /* !PGM_WITHOUT_MAPPING */
1190 if (!fIsBigPage)
1191 {
1192 /*
1193 * 4KB - page.
1194 */
1195 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1196 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1197
1198# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1199 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1200 if (pShwPage->cModifications)
1201 pShwPage->cModifications = 1;
1202# endif
1203
1204# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1205 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1206 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1207# endif
1208 if (pShwPage->GCPhys == GCPhys)
1209 {
1210# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1211 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1212 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1213 if (pPT->a[iPTEDst].n.u1Present)
1214 {
1215 /* This is very unlikely with caching/monitoring enabled. */
1216 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1217 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1218 }
1219# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1220 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1221 if (RT_SUCCESS(rc))
1222 rc = VINF_SUCCESS;
1223# endif
1224 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1225 PGM_INVL_PG(pVCpu, GCPtrPage);
1226 }
1227 else
1228 {
1229 /*
1230 * The page table address changed.
1231 */
1232 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1233 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1234 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1235 ASMAtomicWriteSize(pPdeDst, 0);
1236 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1237 PGM_INVL_VCPU_TLBS(pVCpu);
1238 }
1239 }
1240 else
1241 {
1242 /*
1243 * 2/4MB - page.
1244 */
1245 /* Before freeing the page, check if anything really changed. */
1246 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1247 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1248# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1249 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1250 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1251# endif
1252 if ( pShwPage->GCPhys == GCPhys
1253 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1254 {
1255 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1256 /** @todo PAT */
1257 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1258 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1259 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1260 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1261 {
1262 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1263 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1264# if defined(IN_RC)
1265 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1266 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1267# endif
1268 return VINF_SUCCESS;
1269 }
1270 }
1271
1272 /*
1273 * Ok, the page table is present and it's been changed in the guest.
1274 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1275 * We could do this for some flushes in GC too, but we need an algorithm for
1276 * deciding which 4MB pages containing code likely to be executed very soon.
1277 */
1278 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1279 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1280 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1281 ASMAtomicWriteSize(pPdeDst, 0);
1282 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1283 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1284 }
1285 }
1286 else
1287 {
1288 /*
1289 * Page directory is not present, mark shadow PDE not present.
1290 */
1291 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1292 {
1293 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1294 ASMAtomicWriteSize(pPdeDst, 0);
1295 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1296 PGM_INVL_PG(pVCpu, GCPtrPage);
1297 }
1298 else
1299 {
1300 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1301 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1302 }
1303 }
1304# if defined(IN_RC)
1305 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1306 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1307# endif
1308 return rc;
1309
1310#else /* guest real and protected mode */
1311 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1312 return VINF_SUCCESS;
1313#endif
1314}
1315
1316
1317/**
1318 * Update the tracking of shadowed pages.
1319 *
1320 * @param pVCpu The VMCPU handle.
1321 * @param pShwPage The shadow page.
1322 * @param HCPhys The physical page we is being dereferenced.
1323 * @param iPte Shadow PTE index
1324 */
1325DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1326{
1327 PVM pVM = pVCpu->CTX_SUFF(pVM);
1328
1329 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1330 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1331
1332 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1333 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1334 * 2. write protect all shadowed pages. I.e. implement caching.
1335 */
1336 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1337
1338 /*
1339 * Find the guest address.
1340 */
1341 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1342 pRam;
1343 pRam = pRam->CTX_SUFF(pNext))
1344 {
1345 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1346 while (iPage-- > 0)
1347 {
1348 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1349 {
1350 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1351
1352 Assert(pShwPage->cPresent);
1353 Assert(pPool->cPresent);
1354 pShwPage->cPresent--;
1355 pPool->cPresent--;
1356
1357 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1358 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1359 return;
1360 }
1361 }
1362 }
1363
1364 for (;;)
1365 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1366}
1367
1368
1369/**
1370 * Update the tracking of shadowed pages.
1371 *
1372 * @param pVCpu The VMCPU handle.
1373 * @param pShwPage The shadow page.
1374 * @param u16 The top 16-bit of the pPage->HCPhys.
1375 * @param pPage Pointer to the guest page. this will be modified.
1376 * @param iPTDst The index into the shadow table.
1377 */
1378DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1379{
1380 PVM pVM = pVCpu->CTX_SUFF(pVM);
1381 /*
1382 * Just deal with the simple first time here.
1383 */
1384 if (!u16)
1385 {
1386 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1387 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1388 /* Save the page table index. */
1389 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1390 }
1391 else
1392 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1393
1394 /* write back */
1395 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1396 PGM_PAGE_SET_TRACKING(pPage, u16);
1397
1398 /* update statistics. */
1399 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1400 pShwPage->cPresent++;
1401 if (pShwPage->iFirstPresent > iPTDst)
1402 pShwPage->iFirstPresent = iPTDst;
1403}
1404
1405
1406/**
1407 * Modifies a shadow PTE to account for access handlers.
1408 *
1409 * @param pVM The VM handle.
1410 * @param pPage The page in question.
1411 * @param fPteSrc The flags of the source PTE.
1412 * @param pPteDst The shadow PTE (output).
1413 */
1414DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint32_t fPteSrc, PSHWPTE pPteDst)
1415{
1416 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1417 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1418 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1419 {
1420#if PGM_SHW_TYPE == PGM_TYPE_EPT
1421 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1422 pPteDst->n.u1Present = 1;
1423 pPteDst->n.u1Execute = 1;
1424 pPteDst->n.u1IgnorePAT = 1;
1425 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1426 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1427#else
1428 pPteDst->u = (fPteSrc & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1429 | PGM_PAGE_GET_HCPHYS(pPage);
1430#endif
1431 }
1432#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1433# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1434 else if ( PGM_PAGE_IS_MMIO(pPage)
1435# if PGM_SHW_TYPE != PGM_TYPE_EPT
1436 && ( (fPteSrc & (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/ | X86_PTE_US )) /* #PF handles D & A first. */
1437 == (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/)
1438 || BTH_IS_NP_ACTIVE(pVM) )
1439# endif
1440# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1441 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1442# endif
1443 )
1444 {
1445 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1446# if PGM_SHW_TYPE == PGM_TYPE_EPT
1447 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1448 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1449 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1450 pPteDst->n.u1Present = 0;
1451 pPteDst->n.u1Write = 1;
1452 pPteDst->n.u1Execute = 0;
1453 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1454 pPteDst->n.u3EMT = 7;
1455# else
1456 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1457 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P;
1458# endif
1459 }
1460# endif
1461#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1462 else
1463 {
1464 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1465 pPteDst->u = 0;
1466 }
1467 /** @todo count these kinds of entries. */
1468}
1469
1470
1471/**
1472 * Creates a 4K shadow page for a guest page.
1473 *
1474 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1475 * physical address. The PdeSrc argument only the flags are used. No page
1476 * structured will be mapped in this function.
1477 *
1478 * @param pVCpu The VMCPU handle.
1479 * @param pPteDst Destination page table entry.
1480 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1481 * Can safely assume that only the flags are being used.
1482 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1483 * @param pShwPage Pointer to the shadow page.
1484 * @param iPTDst The index into the shadow table.
1485 *
1486 * @remark Not used for 2/4MB pages!
1487 */
1488DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1489 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1490{
1491 if ( PteSrc.n.u1Present
1492 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1493 {
1494 PVM pVM = pVCpu->CTX_SUFF(pVM);
1495
1496# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1497 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1498 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1499 if (pShwPage->fDirty)
1500 {
1501 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1502 PX86PTPAE pGstPT;
1503
1504 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1505 pGstPT->a[iPTDst].u = PteSrc.u;
1506 }
1507# endif
1508 /*
1509 * Find the ram range.
1510 */
1511 PPGMPAGE pPage;
1512 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1513 if (RT_SUCCESS(rc))
1514 {
1515 /* Ignore ballooned pages.
1516 Don't return errors or use a fatal assert here as part of a
1517 shadow sync range might included ballooned pages. */
1518 if (PGM_PAGE_IS_BALLOONED(pPage))
1519 {
1520 Assert(!pPteDst->n.u1Present); /** @todo user tracking needs updating if this triggers. */
1521 return;
1522 }
1523
1524#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1525 /* Make the page writable if necessary. */
1526 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1527 && ( PGM_PAGE_IS_ZERO(pPage)
1528 || ( PteSrc.n.u1Write
1529 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1530# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1531 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1532# endif
1533# ifdef VBOX_WITH_PAGE_SHARING
1534 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1535# endif
1536 )
1537 )
1538 )
1539 {
1540 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1541 AssertRC(rc);
1542 }
1543#endif
1544
1545 /*
1546 * Make page table entry.
1547 */
1548 SHWPTE PteDst;
1549 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1550 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1551 PteSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT
1552 | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW),
1553 &PteDst);
1554 else
1555 {
1556#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1557 /*
1558 * If the page or page directory entry is not marked accessed,
1559 * we mark the page not present.
1560 */
1561 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1562 {
1563 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1564 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1565 PteDst.u = 0;
1566 }
1567 /*
1568 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1569 * when the page is modified.
1570 */
1571 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1572 {
1573 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1574 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1575 | PGM_PAGE_GET_HCPHYS(pPage)
1576 | PGM_PTFLAGS_TRACK_DIRTY;
1577 }
1578 else
1579#endif
1580 {
1581 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1582#if PGM_SHW_TYPE == PGM_TYPE_EPT
1583 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1584 PteDst.n.u1Present = 1;
1585 PteDst.n.u1Write = 1;
1586 PteDst.n.u1Execute = 1;
1587 PteDst.n.u1IgnorePAT = 1;
1588 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1589 /* PteDst.n.u1Size = 0 */
1590#else
1591 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1592 | PGM_PAGE_GET_HCPHYS(pPage);
1593#endif
1594 }
1595
1596 /*
1597 * Make sure only allocated pages are mapped writable.
1598 */
1599 if ( PteDst.n.u1Write
1600 && PteDst.n.u1Present
1601 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1602 {
1603 /* Still applies to shared pages. */
1604 Assert(!PGM_PAGE_IS_ZERO(pPage));
1605 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. Why, isn't it? */
1606 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1607 }
1608 }
1609
1610 /*
1611 * Keep user track up to date.
1612 */
1613 if (PteDst.n.u1Present)
1614 {
1615 if (!pPteDst->n.u1Present)
1616 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1617 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1618 {
1619 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1620 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1621 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1622 }
1623 }
1624 else if (pPteDst->n.u1Present)
1625 {
1626 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1627 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1628 }
1629
1630 /*
1631 * Update statistics and commit the entry.
1632 */
1633#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1634 if (!PteSrc.n.u1Global)
1635 pShwPage->fSeenNonGlobal = true;
1636#endif
1637 ASMAtomicWriteSize(pPteDst, PteDst.u);
1638 return;
1639 }
1640
1641/** @todo count these three different kinds. */
1642 Log2(("SyncPageWorker: invalid address in Pte\n"));
1643 }
1644 else if (!PteSrc.n.u1Present)
1645 Log2(("SyncPageWorker: page not present in Pte\n"));
1646 else
1647 Log2(("SyncPageWorker: invalid Pte\n"));
1648
1649 /*
1650 * The page is not present or the PTE is bad. Replace the shadow PTE by
1651 * an empty entry, making sure to keep the user tracking up to date.
1652 */
1653 if (pPteDst->n.u1Present)
1654 {
1655 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1656 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1657 }
1658 ASMAtomicWriteSize(pPteDst, 0);
1659}
1660
1661
1662/**
1663 * Syncs a guest OS page.
1664 *
1665 * There are no conflicts at this point, neither is there any need for
1666 * page table allocations.
1667 *
1668 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1669 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1670 *
1671 * @returns VBox status code.
1672 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1673 * @param pVCpu The VMCPU handle.
1674 * @param PdeSrc Page directory entry of the guest.
1675 * @param GCPtrPage Guest context page address.
1676 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1677 * @param uErr Fault error (X86_TRAP_PF_*).
1678 */
1679PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1680{
1681 PVM pVM = pVCpu->CTX_SUFF(pVM);
1682 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1683 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1684
1685 Assert(PGMIsLockOwner(pVM));
1686
1687#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1688 || PGM_GST_TYPE == PGM_TYPE_PAE \
1689 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1690 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1691 && PGM_SHW_TYPE != PGM_TYPE_EPT
1692
1693 /*
1694 * Assert preconditions.
1695 */
1696 Assert(PdeSrc.n.u1Present);
1697 Assert(cPages);
1698# if 0 /* rarely useful; leave for debugging. */
1699 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1700# endif
1701
1702 /*
1703 * Get the shadow PDE, find the shadow page table in the pool.
1704 */
1705# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1706 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1707 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1708
1709 /* Fetch the pgm pool shadow descriptor. */
1710 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1711 Assert(pShwPde);
1712
1713# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1714 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1715 PPGMPOOLPAGE pShwPde = NULL;
1716 PX86PDPAE pPDDst;
1717
1718 /* Fetch the pgm pool shadow descriptor. */
1719 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1720 AssertRCSuccessReturn(rc2, rc2);
1721 Assert(pShwPde);
1722
1723 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1724 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1725
1726# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1727 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1728 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1729 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1730 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1731
1732 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1733 AssertRCSuccessReturn(rc2, rc2);
1734 Assert(pPDDst && pPdptDst);
1735 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1736# endif
1737 SHWPDE PdeDst = *pPdeDst;
1738
1739 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
1740 if (!PdeDst.n.u1Present)
1741 {
1742 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
1743 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
1744 return VINF_SUCCESS; /* force the instruction to be executed again. */
1745 }
1746
1747 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1748 Assert(pShwPage);
1749
1750# if PGM_GST_TYPE == PGM_TYPE_AMD64
1751 /* Fetch the pgm pool shadow descriptor. */
1752 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1753 Assert(pShwPde);
1754# endif
1755
1756# if defined(IN_RC)
1757 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1758 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1759# endif
1760
1761 /*
1762 * Check that the page is present and that the shadow PDE isn't out of sync.
1763 */
1764# if PGM_GST_TYPE == PGM_TYPE_32BIT
1765 const bool fBigPage = PdeSrc.b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
1766# else
1767 const bool fBigPage = PdeSrc.b.u1Size;
1768# endif
1769 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
1770 RTGCPHYS GCPhys;
1771 if (!fBigPage)
1772 {
1773 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1774# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1775 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1776 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
1777# endif
1778 }
1779 else
1780 {
1781 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1782# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1783 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1784 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1785# endif
1786 }
1787 if ( fPdeValid
1788 && pShwPage->GCPhys == GCPhys
1789 && PdeSrc.n.u1Present
1790 && PdeSrc.n.u1User == PdeDst.n.u1User
1791 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
1792# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
1793 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !CPUMIsGuestNXEnabled(pVCpu))
1794# endif
1795 )
1796 {
1797 /*
1798 * Check that the PDE is marked accessed already.
1799 * Since we set the accessed bit *before* getting here on a #PF, this
1800 * check is only meant for dealing with non-#PF'ing paths.
1801 */
1802 if (PdeSrc.n.u1Accessed)
1803 {
1804 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1805 if (!fBigPage)
1806 {
1807 /*
1808 * 4KB Page - Map the guest page table.
1809 */
1810 PGSTPT pPTSrc;
1811 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
1812 if (RT_SUCCESS(rc))
1813 {
1814# ifdef PGM_SYNC_N_PAGES
1815 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
1816 if ( cPages > 1
1817 && !(uErr & X86_TRAP_PF_P)
1818 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
1819 {
1820 /*
1821 * This code path is currently only taken when the caller is PGMTrap0eHandler
1822 * for non-present pages!
1823 *
1824 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
1825 * deal with locality.
1826 */
1827 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1828# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1829 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1830 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
1831# else
1832 const unsigned offPTSrc = 0;
1833# endif
1834 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
1835 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
1836 iPTDst = 0;
1837 else
1838 iPTDst -= PGM_SYNC_NR_PAGES / 2;
1839 for (; iPTDst < iPTDstEnd; iPTDst++)
1840 {
1841 if (!pPTDst->a[iPTDst].n.u1Present)
1842 {
1843 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
1844 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
1845 NOREF(GCPtrCurPage);
1846#ifndef IN_RING0
1847 /*
1848 * Assuming kernel code will be marked as supervisor - and not as user level
1849 * and executed using a conforming code selector - And marked as readonly.
1850 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
1851 */
1852 PPGMPAGE pPage;
1853 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
1854 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
1855 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
1856 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
1857 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1858 )
1859#endif /* else: CSAM not active */
1860 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1861 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
1862 GCPtrCurPage, PteSrc.n.u1Present,
1863 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1864 PteSrc.n.u1User & PdeSrc.n.u1User,
1865 (uint64_t)PteSrc.u,
1866 (uint64_t)pPTDst->a[iPTDst].u,
1867 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1868 }
1869 }
1870 }
1871 else
1872# endif /* PGM_SYNC_N_PAGES */
1873 {
1874 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1875 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1876 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1877 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1878 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1879 GCPtrPage, PteSrc.n.u1Present,
1880 PteSrc.n.u1Write & PdeSrc.n.u1Write,
1881 PteSrc.n.u1User & PdeSrc.n.u1User,
1882 (uint64_t)PteSrc.u,
1883 (uint64_t)pPTDst->a[iPTDst].u,
1884 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1885 }
1886 }
1887 else /* MMIO or invalid page: emulated in #PF handler. */
1888 {
1889 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
1890 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
1891 }
1892 }
1893 else
1894 {
1895 /*
1896 * 4/2MB page - lazy syncing shadow 4K pages.
1897 * (There are many causes of getting here, it's no longer only CSAM.)
1898 */
1899 /* Calculate the GC physical address of this 4KB shadow page. */
1900 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
1901 /* Find ram range. */
1902 PPGMPAGE pPage;
1903 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
1904 if (RT_SUCCESS(rc))
1905 {
1906 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1907
1908# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1909 /* Try to make the page writable if necessary. */
1910 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1911 && ( PGM_PAGE_IS_ZERO(pPage)
1912 || ( PdeSrc.n.u1Write
1913 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1914# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1915 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1916# endif
1917# ifdef VBOX_WITH_PAGE_SHARING
1918 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1919# endif
1920 )
1921 )
1922 )
1923 {
1924 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1925 AssertRC(rc);
1926 }
1927# endif
1928
1929 /*
1930 * Make shadow PTE entry.
1931 */
1932 SHWPTE PteDst;
1933 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1934 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1935 PdeSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK
1936 | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT),
1937 &PteDst);
1938 else
1939 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1940 | PGM_PAGE_GET_HCPHYS(pPage);
1941
1942 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1943 if ( PteDst.n.u1Present
1944 && !pPTDst->a[iPTDst].n.u1Present)
1945 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1946
1947 /* Make sure only allocated pages are mapped writable. */
1948 if ( PteDst.n.u1Write
1949 && PteDst.n.u1Present
1950 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1951 {
1952 /* Still applies to shared pages. */
1953 Assert(!PGM_PAGE_IS_ZERO(pPage));
1954 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
1955 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
1956 }
1957
1958 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
1959
1960 /*
1961 * If the page is not flagged as dirty and is writable, then make it read-only
1962 * at PD level, so we can set the dirty bit when the page is modified.
1963 *
1964 * ASSUMES that page access handlers are implemented on page table entry level.
1965 * Thus we will first catch the dirty access and set PDE.D and restart. If
1966 * there is an access handler, we'll trap again and let it work on the problem.
1967 */
1968 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
1969 * As for invlpg, it simply frees the whole shadow PT.
1970 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
1971 if ( !PdeSrc.b.u1Dirty
1972 && PdeSrc.b.u1Write)
1973 {
1974 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
1975 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
1976 PdeDst.n.u1Write = 0;
1977 }
1978 else
1979 {
1980 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
1981 PdeDst.n.u1Write = PdeSrc.n.u1Write;
1982 }
1983 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
1984 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
1985 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
1986 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
1987 }
1988 else
1989 {
1990 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
1991 /** @todo must wipe the shadow page table in this case. */
1992 }
1993 }
1994# if defined(IN_RC)
1995 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1996 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1997# endif
1998 return VINF_SUCCESS;
1999 }
2000
2001 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
2002 }
2003 else if (fPdeValid)
2004 {
2005 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2006 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2007 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2008 }
2009 else
2010 {
2011/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2012 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2013 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2014 }
2015
2016 /*
2017 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2018 * Yea, I'm lazy.
2019 */
2020 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2021 ASMAtomicWriteSize(pPdeDst, 0);
2022
2023# if defined(IN_RC)
2024 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2025 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2026# endif
2027 PGM_INVL_VCPU_TLBS(pVCpu);
2028 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2029
2030
2031#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2032 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2033 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2034 && !defined(IN_RC)
2035
2036# ifdef PGM_SYNC_N_PAGES
2037 /*
2038 * Get the shadow PDE, find the shadow page table in the pool.
2039 */
2040# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2041 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
2042
2043# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2044 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
2045
2046# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2047 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2048 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2049 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2050 X86PDEPAE PdeDst;
2051 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2052
2053 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2054 AssertRCSuccessReturn(rc, rc);
2055 Assert(pPDDst && pPdptDst);
2056 PdeDst = pPDDst->a[iPDDst];
2057# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2058 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2059 PEPTPD pPDDst;
2060 EPTPDE PdeDst;
2061
2062 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2063 if (rc != VINF_SUCCESS)
2064 {
2065 AssertRC(rc);
2066 return rc;
2067 }
2068 Assert(pPDDst);
2069 PdeDst = pPDDst->a[iPDDst];
2070# endif
2071 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2072 if (!PdeDst.n.u1Present)
2073 {
2074 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2075 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2076 return VINF_SUCCESS; /* force the instruction to be executed again. */
2077 }
2078
2079 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2080 if (PdeDst.n.u1Size)
2081 {
2082 Assert(HWACCMIsNestedPagingActive(pVM) == pVM->pgm.s.fNestedPaging);
2083 Assert(pVM->pgm.s.fNestedPaging);
2084 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2085 return VINF_SUCCESS;
2086 }
2087
2088 /* Mask away the page offset. */
2089 GCPtrPage &= ~((RTGCPTR)0xfff);
2090
2091 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2092 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2093
2094 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2095 if ( cPages > 1
2096 && !(uErr & X86_TRAP_PF_P)
2097 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2098 {
2099 /*
2100 * This code path is currently only taken when the caller is PGMTrap0eHandler
2101 * for non-present pages!
2102 *
2103 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2104 * deal with locality.
2105 */
2106 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2107 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2108 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2109 iPTDst = 0;
2110 else
2111 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2112 for (; iPTDst < iPTDstEnd; iPTDst++)
2113 {
2114 if (!pPTDst->a[iPTDst].n.u1Present)
2115 {
2116 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2117 GSTPTE PteSrc;
2118
2119 /* Fake the page table entry */
2120 PteSrc.u = GCPtrCurPage;
2121 PteSrc.n.u1Present = 1;
2122 PteSrc.n.u1Dirty = 1;
2123 PteSrc.n.u1Accessed = 1;
2124 PteSrc.n.u1Write = 1;
2125 PteSrc.n.u1User = 1;
2126
2127 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2128
2129 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2130 GCPtrCurPage, PteSrc.n.u1Present,
2131 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2132 PteSrc.n.u1User & PdeSrc.n.u1User,
2133 (uint64_t)PteSrc.u,
2134 (uint64_t)pPTDst->a[iPTDst].u,
2135 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2136
2137 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2138 break;
2139 }
2140 else
2141 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2142 }
2143 }
2144 else
2145# endif /* PGM_SYNC_N_PAGES */
2146 {
2147 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2148 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2149 GSTPTE PteSrc;
2150
2151 /* Fake the page table entry */
2152 PteSrc.u = GCPtrCurPage;
2153 PteSrc.n.u1Present = 1;
2154 PteSrc.n.u1Dirty = 1;
2155 PteSrc.n.u1Accessed = 1;
2156 PteSrc.n.u1Write = 1;
2157 PteSrc.n.u1User = 1;
2158 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2159
2160 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2161 GCPtrPage, PteSrc.n.u1Present,
2162 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2163 PteSrc.n.u1User & PdeSrc.n.u1User,
2164 (uint64_t)PteSrc.u,
2165 (uint64_t)pPTDst->a[iPTDst].u,
2166 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2167 }
2168 return VINF_SUCCESS;
2169
2170#else
2171 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2172 return VERR_INTERNAL_ERROR;
2173#endif
2174}
2175
2176
2177#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2178
2179/**
2180 * CheckPageFault helper for returning a page fault indicating a non-present
2181 * (NP) entry in the page translation structures.
2182 *
2183 * @returns VINF_EM_RAW_GUEST_TRAP.
2184 * @param pVCpu The virtual CPU to operate on.
2185 * @param uErr The error code of the shadow fault. Corrections to
2186 * TRPM's copy will be made if necessary.
2187 * @param GCPtrPage For logging.
2188 * @param uPageFaultLevel For logging.
2189 */
2190DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2191{
2192 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2193 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2194 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2195 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2196 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2197
2198 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2199 return VINF_EM_RAW_GUEST_TRAP;
2200}
2201
2202
2203/**
2204 * CheckPageFault helper for returning a page fault indicating a reserved bit
2205 * (RSVD) error in the page translation structures.
2206 *
2207 * @returns VINF_EM_RAW_GUEST_TRAP.
2208 * @param pVCpu The virtual CPU to operate on.
2209 * @param uErr The error code of the shadow fault. Corrections to
2210 * TRPM's copy will be made if necessary.
2211 * @param GCPtrPage For logging.
2212 * @param uPageFaultLevel For logging.
2213 */
2214DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2215{
2216 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2217 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2218 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2219
2220 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2221 return VINF_EM_RAW_GUEST_TRAP;
2222}
2223
2224
2225/**
2226 * CheckPageFault helper for returning a page protection fault (P).
2227 *
2228 * @returns VINF_EM_RAW_GUEST_TRAP.
2229 * @param pVCpu The virtual CPU to operate on.
2230 * @param uErr The error code of the shadow fault. Corrections to
2231 * TRPM's copy will be made if necessary.
2232 * @param GCPtrPage For logging.
2233 * @param uPageFaultLevel For logging.
2234 */
2235DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2236{
2237 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2238 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2239 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2240 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2241
2242 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2243 return VINF_EM_RAW_GUEST_TRAP;
2244}
2245
2246
2247/**
2248 * Investigate a page fault to identify ones targetted at the guest and to
2249 * handle write protection page faults caused by dirty bit tracking.
2250 *
2251 * This will do detect invalid entries and raise X86_TRAP_PF_RSVD.
2252 *
2253 * @returns VBox status code.
2254 * @param pVCpu The VMCPU handle.
2255 * @param uErr Page fault error code. The X86_TRAP_PF_RSVD flag
2256 * cannot be trusted as it is used for MMIO optimizations.
2257 * @param pPdeSrc Guest page directory entry.
2258 * @param GCPtrPage Guest context page address.
2259 */
2260PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2261{
2262 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2263 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2264# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2265 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && CPUMIsGuestNXEnabled(pVCpu);
2266# endif
2267 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2268 PVM pVM = pVCpu->CTX_SUFF(pVM);
2269 int rc;
2270
2271 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2272
2273 /*
2274 * Note! For PAE it is safe to assume that bad guest physical addresses
2275 * (which returns all FFs) in the translation tables will cause
2276 * #PF(RSVD). The same will be the case for long mode provided the
2277 * physical address width is less than 52 bits - this we ASSUME.
2278 *
2279 * Note! No convenient shortcuts here, we have to validate everything!
2280 */
2281
2282# if PGM_GST_TYPE == PGM_TYPE_AMD64
2283 /*
2284 * Real page fault? (PML4E level)
2285 */
2286 PX86PML4 pPml4Src = pgmGstGetLongModePML4Ptr(pVCpu);
2287 if (RT_UNLIKELY(!pPml4Src))
2288 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2289
2290 PX86PML4E pPml4eSrc = &pPml4Src->a[(GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK];
2291 if (!pPml4eSrc->n.u1Present)
2292 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 0);
2293 if (RT_UNLIKELY(!GST_IS_PML4E_VALID(pVCpu, *pPml4eSrc)))
2294 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2295 if ( (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2296 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2297 || (fUserLevelFault && !pPml4eSrc->n.u1User) )
2298 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2299
2300 /*
2301 * Real page fault? (PDPE level)
2302 */
2303 PX86PDPT pPdptSrc;
2304 rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK, &pPdptSrc);
2305 if (RT_FAILURE(rc))
2306 {
2307 AssertMsgReturn(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc), rc);
2308 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2309 }
2310
2311 PX86PDPE pPdpeSrc = &pPdptSrc->a[(GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64];
2312 if (!pPdpeSrc->n.u1Present)
2313 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2314 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2315 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2316 if ( (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2317 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2318 || (fUserLevelFault && !pPdpeSrc->lm.u1User) )
2319 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 1);
2320
2321# elif PGM_GST_TYPE == PGM_TYPE_PAE
2322 /*
2323 * Real page fault? (PDPE level)
2324 */
2325 PX86PDPT pPdptSrc = pgmGstGetPaePDPTPtr(pVCpu);
2326 if (RT_UNLIKELY(!pPdptSrc))
2327 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2328/** @todo Handle bad CR3 address. */
2329 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(pVCpu, GCPtrPage);
2330 if (!pPdpeSrc->n.u1Present)
2331 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2332 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2333 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2334# endif /* PGM_GST_TYPE == PGM_TYPE_PAE */
2335
2336 /*
2337 * Real page fault? (PDE level)
2338 */
2339 if (!pPdeSrc->n.u1Present)
2340 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 2);
2341# if PGM_GST_TYPE == PGM_TYPE_32BIT
2342 bool const fBigPage = pPdeSrc->b.u1Size && CPUMIsGuestPageSizeExtEnabled(pVCpu);
2343# else
2344 bool const fBigPage = pPdeSrc->b.u1Size;
2345# endif
2346 if (!fBigPage ? !GST_IS_PDE_VALID(pVCpu, *pPdeSrc) : !GST_IS_BIG_PDE_VALID(pVCpu, *pPdeSrc))
2347 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 2);
2348 if ( (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2349# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2350 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2351# endif
2352 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2353 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 2);
2354
2355 /*
2356 * First check the easy case where the page directory has been marked
2357 * read-only to track the dirty bit of an emulated BIG page.
2358 */
2359 if (fBigPage)
2360 {
2361 /* Mark guest page directory as accessed */
2362# if PGM_GST_TYPE == PGM_TYPE_AMD64
2363 pPml4eSrc->n.u1Accessed = 1;
2364 pPdpeSrc->lm.u1Accessed = 1;
2365# endif
2366 pPdeSrc->b.u1Accessed = 1;
2367
2368 /* Mark the entry guest PDE dirty it it's a write access. */
2369 if (fWriteFault)
2370 pPdeSrc->b.u1Dirty = 1;
2371 }
2372 else
2373 {
2374 /*
2375 * Map the guest page table.
2376 */
2377 PGSTPT pPTSrc;
2378 PGSTPTE pPteSrc;
2379 GSTPTE PteSrc;
2380 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2381 if (RT_SUCCESS(rc))
2382 {
2383 pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2384 PteSrc.u = pPteSrc->u;
2385 }
2386 else if (rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS)
2387 {
2388 /* All bits in the PTE are set. */
2389# if PGM_GST_TYPE == PGM_TYPE_32BIT
2390 PteSrc.u = UINT32_MAX;
2391# else
2392 PteSrc.u = UINT64_MAX;
2393# endif
2394 pPteSrc = &PteSrc;
2395 }
2396 else
2397 {
2398 AssertRC(rc);
2399 return rc;
2400 }
2401
2402 /*
2403 * Real page fault?
2404 */
2405 if (!PteSrc.n.u1Present)
2406 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 3);
2407 if (!GST_IS_PTE_VALID(pVCpu, PteSrc))
2408 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 3);
2409 if ( (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2410# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2411 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2412# endif
2413 || (fUserLevelFault && !PteSrc.n.u1User) )
2414 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2415
2416 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2417
2418 /*
2419 * Set the accessed bits in the page directory and the page table.
2420 */
2421# if PGM_GST_TYPE == PGM_TYPE_AMD64
2422 pPml4eSrc->n.u1Accessed = 1;
2423 pPdpeSrc->lm.u1Accessed = 1;
2424# endif
2425 pPdeSrc->n.u1Accessed = 1;
2426 pPteSrc->n.u1Accessed = 1;
2427
2428 /*
2429 * Set the dirty flag in the PTE if it's a write access.
2430 */
2431 if (fWriteFault)
2432 {
2433# ifdef VBOX_WITH_STATISTICS
2434 if (!pPteSrc->n.u1Dirty)
2435 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2436 else
2437 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2438# endif
2439
2440 pPteSrc->n.u1Dirty = 1;
2441 }
2442 }
2443 return VINF_SUCCESS;
2444}
2445
2446
2447/**
2448 * Handle dirty bit tracking faults.
2449 *
2450 * @returns VBox status code.
2451 * @param pVCpu The VMCPU handle.
2452 * @param uErr Page fault error code.
2453 * @param pPdeSrc Guest page directory entry.
2454 * @param pPdeDst Shadow page directory entry.
2455 * @param GCPtrPage Guest context page address.
2456 */
2457PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2458{
2459# if PGM_GST_TYPE == PGM_TYPE_32BIT
2460 const bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
2461# else
2462 const bool fBigPagesSupported = true;
2463# endif
2464 PVM pVM = pVCpu->CTX_SUFF(pVM);
2465 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2466
2467 Assert(PGMIsLockOwner(pVM));
2468
2469 /*
2470 * Handle big page.
2471 */
2472 if (pPdeSrc->b.u1Size && fBigPagesSupported)
2473 {
2474 if ( pPdeDst->n.u1Present
2475 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2476 {
2477 SHWPDE PdeDst = *pPdeDst;
2478
2479 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2480 Assert(pPdeSrc->b.u1Write);
2481
2482 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2483 * fault again and take this path to only invalidate the entry (see below).
2484 */
2485 PdeDst.n.u1Write = 1;
2486 PdeDst.n.u1Accessed = 1;
2487 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2488 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2489 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2490 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2491 }
2492
2493# ifdef IN_RING0
2494 /* Check for stale TLB entry; only applies to the SMP guest case. */
2495 if ( pVM->cCpus > 1
2496 && pPdeDst->n.u1Write
2497 && pPdeDst->n.u1Accessed)
2498 {
2499 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2500 if (pShwPage)
2501 {
2502 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2503 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2504 if ( pPteDst->n.u1Present
2505 && pPteDst->n.u1Write)
2506 {
2507 /* Stale TLB entry. */
2508 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2509 PGM_INVL_PG(pVCpu, GCPtrPage);
2510 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2511 }
2512 }
2513 }
2514# endif /* IN_RING0 */
2515 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2516 }
2517
2518 /*
2519 * Map the guest page table.
2520 */
2521 PGSTPT pPTSrc;
2522 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2523 if (RT_FAILURE(rc))
2524 {
2525 AssertRC(rc);
2526 return rc;
2527 }
2528
2529 if (pPdeDst->n.u1Present)
2530 {
2531 PGSTPTE pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2532 const GSTPTE PteSrc = *pPteSrc;
2533
2534#ifndef IN_RING0
2535 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2536 * Our individual shadow handlers will provide more information and force a fatal exit.
2537 */
2538 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2539 {
2540 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2541 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2542 }
2543#endif
2544 /*
2545 * Map shadow page table.
2546 */
2547 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2548 if (pShwPage)
2549 {
2550 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2551 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2552 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2553 {
2554 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2555 {
2556 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2557 SHWPTE PteDst = *pPteDst;
2558
2559 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2560 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2561
2562 Assert(pPteSrc->n.u1Write);
2563
2564 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2565 * entry will not harm; write access will simply fault again and
2566 * take this path to only invalidate the entry.
2567 */
2568 if (RT_LIKELY(pPage))
2569 {
2570 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2571 {
2572 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2573 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2574 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2575 PteDst.n.u1Write = 0;
2576 }
2577 else
2578 {
2579 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2580 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2581 {
2582 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2583 AssertRC(rc);
2584 }
2585 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2586 PteDst.n.u1Write = 1;
2587 else
2588 {
2589 /* Still applies to shared pages. */
2590 Assert(!PGM_PAGE_IS_ZERO(pPage));
2591 PteDst.n.u1Write = 0;
2592 }
2593 }
2594 }
2595 else
2596 PteDst.n.u1Write = 1; /** @todo r=bird: This doesn't make sense to me. */
2597
2598 PteDst.n.u1Dirty = 1;
2599 PteDst.n.u1Accessed = 1;
2600 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2601 ASMAtomicWriteSize(pPteDst, PteDst.u);
2602 PGM_INVL_PG(pVCpu, GCPtrPage);
2603 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2604 }
2605
2606# ifdef IN_RING0
2607 /* Check for stale TLB entry; only applies to the SMP guest case. */
2608 if ( pVM->cCpus > 1
2609 && pPteDst->n.u1Write == 1
2610 && pPteDst->n.u1Accessed == 1)
2611 {
2612 /* Stale TLB entry. */
2613 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2614 PGM_INVL_PG(pVCpu, GCPtrPage);
2615 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2616 }
2617# endif
2618 }
2619 }
2620 else
2621 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2622 }
2623
2624 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2625}
2626
2627#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2628
2629
2630/**
2631 * Sync a shadow page table.
2632 *
2633 * The shadow page table is not present. This includes the case where
2634 * there is a conflict with a mapping.
2635 *
2636 * @returns VBox status code.
2637 * @param pVCpu The VMCPU handle.
2638 * @param iPD Page directory index.
2639 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2640 * Assume this is a temporary mapping.
2641 * @param GCPtrPage GC Pointer of the page that caused the fault
2642 */
2643PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2644{
2645 PVM pVM = pVCpu->CTX_SUFF(pVM);
2646 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2647
2648 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2649#if 0 /* rarely useful; leave for debugging. */
2650 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2651#endif
2652 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2653
2654 Assert(PGMIsLocked(pVM));
2655
2656#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2657 || PGM_GST_TYPE == PGM_TYPE_PAE \
2658 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2659 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2660 && PGM_SHW_TYPE != PGM_TYPE_EPT
2661
2662 int rc = VINF_SUCCESS;
2663
2664 /*
2665 * Validate input a little bit.
2666 */
2667 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2668# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2669 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2670 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2671
2672 /* Fetch the pgm pool shadow descriptor. */
2673 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2674 Assert(pShwPde);
2675
2676# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2677 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2678 PPGMPOOLPAGE pShwPde = NULL;
2679 PX86PDPAE pPDDst;
2680 PSHWPDE pPdeDst;
2681
2682 /* Fetch the pgm pool shadow descriptor. */
2683 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2684 AssertRCSuccessReturn(rc, rc);
2685 Assert(pShwPde);
2686
2687 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2688 pPdeDst = &pPDDst->a[iPDDst];
2689
2690# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2691 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2692 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2693 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2694 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2695 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2696 AssertRCSuccessReturn(rc, rc);
2697 Assert(pPDDst);
2698 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2699# endif
2700 SHWPDE PdeDst = *pPdeDst;
2701
2702# if PGM_GST_TYPE == PGM_TYPE_AMD64
2703 /* Fetch the pgm pool shadow descriptor. */
2704 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2705 Assert(pShwPde);
2706# endif
2707
2708# ifndef PGM_WITHOUT_MAPPINGS
2709 /*
2710 * Check for conflicts.
2711 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2712 * R3: Simply resolve the conflict.
2713 */
2714 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2715 {
2716 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2717# ifndef IN_RING3
2718 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2719 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2720 return VERR_ADDRESS_CONFLICT;
2721
2722# else /* IN_RING3 */
2723 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2724 Assert(pMapping);
2725# if PGM_GST_TYPE == PGM_TYPE_32BIT
2726 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2727# elif PGM_GST_TYPE == PGM_TYPE_PAE
2728 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2729# else
2730 AssertFailed(); /* can't happen for amd64 */
2731# endif
2732 if (RT_FAILURE(rc))
2733 {
2734 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2735 return rc;
2736 }
2737 PdeDst = *pPdeDst;
2738# endif /* IN_RING3 */
2739 }
2740# endif /* !PGM_WITHOUT_MAPPINGS */
2741 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2742
2743# if defined(IN_RC)
2744 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2745 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2746# endif
2747
2748 /*
2749 * Sync page directory entry.
2750 */
2751 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2752 if (PdeSrc.n.u1Present)
2753 {
2754 /*
2755 * Allocate & map the page table.
2756 */
2757 PSHWPT pPTDst;
2758# if PGM_GST_TYPE == PGM_TYPE_32BIT
2759 const bool fPageTable = !PdeSrc.b.u1Size || !CPUMIsGuestPageSizeExtEnabled(pVCpu);
2760# else
2761 const bool fPageTable = !PdeSrc.b.u1Size;
2762# endif
2763 PPGMPOOLPAGE pShwPage;
2764 RTGCPHYS GCPhys;
2765 if (fPageTable)
2766 {
2767 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2768# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2769 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2770 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2771# endif
2772 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
2773 }
2774 else
2775 {
2776 PGMPOOLACCESS enmAccess;
2777# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2778 const bool fNoExecute = PdeSrc.n.u1NoExecute && CPUMIsGuestNXEnabled(pVCpu);
2779# else
2780 const bool fNoExecute = false;
2781# endif
2782
2783 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
2784# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2785 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2786 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2787# endif
2788 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2789 if (PdeSrc.n.u1User)
2790 {
2791 if (PdeSrc.n.u1Write)
2792 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2793 else
2794 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2795 }
2796 else
2797 {
2798 if (PdeSrc.n.u1Write)
2799 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2800 else
2801 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2802 }
2803 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
2804 }
2805 if (rc == VINF_SUCCESS)
2806 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2807 else if (rc == VINF_PGM_CACHED_PAGE)
2808 {
2809 /*
2810 * The PT was cached, just hook it up.
2811 */
2812 if (fPageTable)
2813 PdeDst.u = pShwPage->Core.Key
2814 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2815 else
2816 {
2817 PdeDst.u = pShwPage->Core.Key
2818 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2819 /* (see explanation and assumptions further down.) */
2820 if ( !PdeSrc.b.u1Dirty
2821 && PdeSrc.b.u1Write)
2822 {
2823 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2824 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2825 PdeDst.b.u1Write = 0;
2826 }
2827 }
2828 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2829# if defined(IN_RC)
2830 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2831# endif
2832 return VINF_SUCCESS;
2833 }
2834 else if (rc == VERR_PGM_POOL_FLUSHED)
2835 {
2836 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
2837# if defined(IN_RC)
2838 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2839# endif
2840 return VINF_PGM_SYNC_CR3;
2841 }
2842 else
2843 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
2844 PdeDst.u &= X86_PDE_AVL_MASK;
2845 PdeDst.u |= pShwPage->Core.Key;
2846
2847 /*
2848 * Page directory has been accessed (this is a fault situation, remember).
2849 */
2850 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
2851 if (fPageTable)
2852 {
2853 /*
2854 * Page table - 4KB.
2855 *
2856 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2857 */
2858 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2859 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
2860 PGSTPT pPTSrc;
2861 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2862 if (RT_SUCCESS(rc))
2863 {
2864 /*
2865 * Start by syncing the page directory entry so CSAM's TLB trick works.
2866 */
2867 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2868 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2869 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2870# if defined(IN_RC)
2871 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2872# endif
2873
2874 /*
2875 * Directory/page user or supervisor privilege: (same goes for read/write)
2876 *
2877 * Directory Page Combined
2878 * U/S U/S U/S
2879 * 0 0 0
2880 * 0 1 0
2881 * 1 0 0
2882 * 1 1 1
2883 *
2884 * Simple AND operation. Table listed for completeness.
2885 *
2886 */
2887 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
2888# ifdef PGM_SYNC_N_PAGES
2889 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2890 unsigned iPTDst = iPTBase;
2891 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2892 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2893 iPTDst = 0;
2894 else
2895 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2896# else /* !PGM_SYNC_N_PAGES */
2897 unsigned iPTDst = 0;
2898 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2899# endif /* !PGM_SYNC_N_PAGES */
2900# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2901 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2902 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2903# else
2904 const unsigned offPTSrc = 0;
2905# endif
2906 for (; iPTDst < iPTDstEnd; iPTDst++)
2907 {
2908 const unsigned iPTSrc = iPTDst + offPTSrc;
2909 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2910
2911 if (PteSrc.n.u1Present)
2912 {
2913# ifndef IN_RING0
2914 /*
2915 * Assuming kernel code will be marked as supervisor - and not as user level
2916 * and executed using a conforming code selector - And marked as readonly.
2917 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2918 */
2919 PPGMPAGE pPage;
2920 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
2921 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
2922 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2923 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2924 )
2925# endif
2926 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2927 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2928 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
2929 PteSrc.n.u1Present,
2930 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2931 PteSrc.n.u1User & PdeSrc.n.u1User,
2932 (uint64_t)PteSrc.u,
2933 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
2934 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
2935 }
2936 /* else: the page table was cleared by the pool */
2937 } /* for PTEs */
2938 }
2939 }
2940 else
2941 {
2942 /*
2943 * Big page - 2/4MB.
2944 *
2945 * We'll walk the ram range list in parallel and optimize lookups.
2946 * We will only sync on shadow page table at a time.
2947 */
2948 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
2949
2950 /**
2951 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
2952 */
2953
2954 /*
2955 * Start by syncing the page directory entry.
2956 */
2957 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2958 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
2959
2960 /*
2961 * If the page is not flagged as dirty and is writable, then make it read-only
2962 * at PD level, so we can set the dirty bit when the page is modified.
2963 *
2964 * ASSUMES that page access handlers are implemented on page table entry level.
2965 * Thus we will first catch the dirty access and set PDE.D and restart. If
2966 * there is an access handler, we'll trap again and let it work on the problem.
2967 */
2968 /** @todo move the above stuff to a section in the PGM documentation. */
2969 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2970 if ( !PdeSrc.b.u1Dirty
2971 && PdeSrc.b.u1Write)
2972 {
2973 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2974 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2975 PdeDst.b.u1Write = 0;
2976 }
2977 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2978# if defined(IN_RC)
2979 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2980# endif
2981
2982 /*
2983 * Fill the shadow page table.
2984 */
2985 /* Get address and flags from the source PDE. */
2986 SHWPTE PteDstBase;
2987 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
2988
2989 /* Loop thru the entries in the shadow PT. */
2990 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2991 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2992 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
2993 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2994 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
2995 unsigned iPTDst = 0;
2996 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2997 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2998 {
2999 /* Advance ram range list. */
3000 while (pRam && GCPhys > pRam->GCPhysLast)
3001 pRam = pRam->CTX_SUFF(pNext);
3002 if (pRam && GCPhys >= pRam->GCPhys)
3003 {
3004 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
3005 do
3006 {
3007 /* Make shadow PTE. */
3008 PPGMPAGE pPage = &pRam->aPages[iHCPage];
3009 SHWPTE PteDst;
3010
3011# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3012 /* Try to make the page writable if necessary. */
3013 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
3014 && ( PGM_PAGE_IS_ZERO(pPage)
3015 || ( PteDstBase.n.u1Write
3016 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
3017# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
3018 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
3019# endif
3020# ifdef VBOX_WITH_PAGE_SHARING
3021 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
3022# endif
3023 && !PGM_PAGE_IS_BALLOONED(pPage))
3024 )
3025 )
3026 {
3027 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
3028 AssertRCReturn(rc, rc);
3029 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3030 break;
3031 }
3032# endif
3033
3034 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
3035 {
3036 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
3037 {
3038 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3039 PteDst.n.u1Write = 0;
3040 }
3041 else
3042 PteDst.u = 0;
3043 }
3044 else if (PGM_PAGE_IS_BALLOONED(pPage))
3045 {
3046 /* Skip ballooned pages. */
3047 PteDst.u = 0;
3048 }
3049# ifndef IN_RING0
3050 /*
3051 * Assuming kernel code will be marked as supervisor and not as user level and executed
3052 * using a conforming code selector. Don't check for readonly, as that implies the whole
3053 * 4MB can be code or readonly data. Linux enables write access for its large pages.
3054 */
3055 else if ( !PdeSrc.n.u1User
3056 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
3057 PteDst.u = 0;
3058# endif
3059 else
3060 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3061
3062 /* Only map writable pages writable. */
3063 if ( PteDst.n.u1Write
3064 && PteDst.n.u1Present
3065 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
3066 {
3067 /* Still applies to shared pages. */
3068 Assert(!PGM_PAGE_IS_ZERO(pPage));
3069 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
3070 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
3071 }
3072
3073 if (PteDst.n.u1Present)
3074 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
3075
3076 /* commit it */
3077 pPTDst->a[iPTDst] = PteDst;
3078 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
3079 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
3080 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3081
3082 /* advance */
3083 GCPhys += PAGE_SIZE;
3084 iHCPage++;
3085 iPTDst++;
3086 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3087 && GCPhys <= pRam->GCPhysLast);
3088 }
3089 else if (pRam)
3090 {
3091 Log(("Invalid pages at %RGp\n", GCPhys));
3092 do
3093 {
3094 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3095 GCPhys += PAGE_SIZE;
3096 iPTDst++;
3097 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3098 && GCPhys < pRam->GCPhys);
3099 }
3100 else
3101 {
3102 Log(("Invalid pages at %RGp (2)\n", GCPhys));
3103 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3104 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3105 }
3106 } /* while more PTEs */
3107 } /* 4KB / 4MB */
3108 }
3109 else
3110 AssertRelease(!PdeDst.n.u1Present);
3111
3112 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3113 if (RT_FAILURE(rc))
3114 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
3115 return rc;
3116
3117#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3118 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3119 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3120 && !defined(IN_RC)
3121
3122 /*
3123 * Validate input a little bit.
3124 */
3125 int rc = VINF_SUCCESS;
3126# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3127 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3128 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3129
3130 /* Fetch the pgm pool shadow descriptor. */
3131 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3132 Assert(pShwPde);
3133
3134# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3135 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3136 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3137 PX86PDPAE pPDDst;
3138 PSHWPDE pPdeDst;
3139
3140 /* Fetch the pgm pool shadow descriptor. */
3141 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
3142 AssertRCSuccessReturn(rc, rc);
3143 Assert(pShwPde);
3144
3145 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
3146 pPdeDst = &pPDDst->a[iPDDst];
3147
3148# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3149 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3150 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3151 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3152 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3153 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3154 AssertRCSuccessReturn(rc, rc);
3155 Assert(pPDDst);
3156 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3157
3158 /* Fetch the pgm pool shadow descriptor. */
3159 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3160 Assert(pShwPde);
3161
3162# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3163 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3164 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3165 PEPTPD pPDDst;
3166 PEPTPDPT pPdptDst;
3167
3168 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3169 if (rc != VINF_SUCCESS)
3170 {
3171 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3172 AssertRC(rc);
3173 return rc;
3174 }
3175 Assert(pPDDst);
3176 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3177
3178 /* Fetch the pgm pool shadow descriptor. */
3179 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3180 Assert(pShwPde);
3181# endif
3182 SHWPDE PdeDst = *pPdeDst;
3183
3184 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3185 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3186
3187# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3188 if (BTH_IS_NP_ACTIVE(pVM))
3189 {
3190 PPGMPAGE pPage;
3191
3192 /* Check if we allocated a big page before for this 2 MB range. */
3193 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3194 if (RT_SUCCESS(rc))
3195 {
3196 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3197
3198 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3199 {
3200 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3201 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3202 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3203 }
3204 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3205 {
3206 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3207 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3208 if (RT_SUCCESS(rc))
3209 {
3210 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3211 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3212 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3213 }
3214 }
3215 else if (PGMIsUsingLargePages(pVM))
3216 {
3217 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3218 if (RT_SUCCESS(rc))
3219 {
3220 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3221 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3222 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3223 }
3224 else
3225 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3226 }
3227
3228 if (HCPhys != NIL_RTHCPHYS)
3229 {
3230 PdeDst.u &= X86_PDE_AVL_MASK;
3231 PdeDst.u |= HCPhys;
3232 PdeDst.n.u1Present = 1;
3233 PdeDst.n.u1Write = 1;
3234 PdeDst.b.u1Size = 1;
3235# if PGM_SHW_TYPE == PGM_TYPE_EPT
3236 PdeDst.n.u1Execute = 1;
3237 PdeDst.b.u1IgnorePAT = 1;
3238 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3239# else
3240 PdeDst.n.u1User = 1;
3241# endif
3242 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3243
3244 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3245 /* Add a reference to the first page only. */
3246 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3247
3248 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3249 return VINF_SUCCESS;
3250 }
3251 }
3252 }
3253# endif /* HC_ARCH_BITS == 64 */
3254
3255 GSTPDE PdeSrc;
3256 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3257 PdeSrc.n.u1Present = 1;
3258 PdeSrc.n.u1Write = 1;
3259 PdeSrc.n.u1Accessed = 1;
3260 PdeSrc.n.u1User = 1;
3261
3262 /*
3263 * Allocate & map the page table.
3264 */
3265 PSHWPT pPTDst;
3266 PPGMPOOLPAGE pShwPage;
3267 RTGCPHYS GCPhys;
3268
3269 /* Virtual address = physical address */
3270 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3271 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3272
3273 if ( rc == VINF_SUCCESS
3274 || rc == VINF_PGM_CACHED_PAGE)
3275 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3276 else
3277 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3278
3279 PdeDst.u &= X86_PDE_AVL_MASK;
3280 PdeDst.u |= pShwPage->Core.Key;
3281 PdeDst.n.u1Present = 1;
3282 PdeDst.n.u1Write = 1;
3283# if PGM_SHW_TYPE == PGM_TYPE_EPT
3284 PdeDst.n.u1Execute = 1;
3285# else
3286 PdeDst.n.u1User = 1;
3287 PdeDst.n.u1Accessed = 1;
3288# endif
3289 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3290
3291 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3292 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3293 return rc;
3294
3295#else
3296 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3297 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3298 return VERR_INTERNAL_ERROR;
3299#endif
3300}
3301
3302
3303
3304/**
3305 * Prefetch a page/set of pages.
3306 *
3307 * Typically used to sync commonly used pages before entering raw mode
3308 * after a CR3 reload.
3309 *
3310 * @returns VBox status code.
3311 * @param pVCpu The VMCPU handle.
3312 * @param GCPtrPage Page to invalidate.
3313 */
3314PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3315{
3316#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3317 || PGM_GST_TYPE == PGM_TYPE_REAL \
3318 || PGM_GST_TYPE == PGM_TYPE_PROT \
3319 || PGM_GST_TYPE == PGM_TYPE_PAE \
3320 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3321 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3322 && PGM_SHW_TYPE != PGM_TYPE_EPT
3323
3324 /*
3325 * Check that all Guest levels thru the PDE are present, getting the
3326 * PD and PDE in the processes.
3327 */
3328 int rc = VINF_SUCCESS;
3329# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3330# if PGM_GST_TYPE == PGM_TYPE_32BIT
3331 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3332 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3333# elif PGM_GST_TYPE == PGM_TYPE_PAE
3334 unsigned iPDSrc;
3335 X86PDPE PdpeSrc;
3336 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3337 if (!pPDSrc)
3338 return VINF_SUCCESS; /* not present */
3339# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3340 unsigned iPDSrc;
3341 PX86PML4E pPml4eSrc;
3342 X86PDPE PdpeSrc;
3343 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3344 if (!pPDSrc)
3345 return VINF_SUCCESS; /* not present */
3346# endif
3347 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3348# else
3349 PGSTPD pPDSrc = NULL;
3350 const unsigned iPDSrc = 0;
3351 GSTPDE PdeSrc;
3352
3353 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3354 PdeSrc.n.u1Present = 1;
3355 PdeSrc.n.u1Write = 1;
3356 PdeSrc.n.u1Accessed = 1;
3357 PdeSrc.n.u1User = 1;
3358# endif
3359
3360 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3361 {
3362 PVM pVM = pVCpu->CTX_SUFF(pVM);
3363 pgmLock(pVM);
3364
3365# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3366 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3367# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3368 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3369 PX86PDPAE pPDDst;
3370 X86PDEPAE PdeDst;
3371# if PGM_GST_TYPE != PGM_TYPE_PAE
3372 X86PDPE PdpeSrc;
3373
3374 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3375 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3376# endif
3377 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3378 if (rc != VINF_SUCCESS)
3379 {
3380 pgmUnlock(pVM);
3381 AssertRC(rc);
3382 return rc;
3383 }
3384 Assert(pPDDst);
3385 PdeDst = pPDDst->a[iPDDst];
3386
3387# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3388 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3389 PX86PDPAE pPDDst;
3390 X86PDEPAE PdeDst;
3391
3392# if PGM_GST_TYPE == PGM_TYPE_PROT
3393 /* AMD-V nested paging */
3394 X86PML4E Pml4eSrc;
3395 X86PDPE PdpeSrc;
3396 PX86PML4E pPml4eSrc = &Pml4eSrc;
3397
3398 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3399 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3400 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3401# endif
3402
3403 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3404 if (rc != VINF_SUCCESS)
3405 {
3406 pgmUnlock(pVM);
3407 AssertRC(rc);
3408 return rc;
3409 }
3410 Assert(pPDDst);
3411 PdeDst = pPDDst->a[iPDDst];
3412# endif
3413 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3414 {
3415 if (!PdeDst.n.u1Present)
3416 {
3417 /** @todo r=bird: This guy will set the A bit on the PDE,
3418 * probably harmless. */
3419 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3420 }
3421 else
3422 {
3423 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3424 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3425 * makes no sense to prefetch more than one page.
3426 */
3427 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3428 if (RT_SUCCESS(rc))
3429 rc = VINF_SUCCESS;
3430 }
3431 }
3432 pgmUnlock(pVM);
3433 }
3434 return rc;
3435
3436#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3437 return VINF_SUCCESS; /* ignore */
3438#else
3439 AssertCompile(0);
3440#endif
3441}
3442
3443
3444
3445
3446/**
3447 * Syncs a page during a PGMVerifyAccess() call.
3448 *
3449 * @returns VBox status code (informational included).
3450 * @param pVCpu The VMCPU handle.
3451 * @param GCPtrPage The address of the page to sync.
3452 * @param fPage The effective guest page flags.
3453 * @param uErr The trap error code.
3454 * @remarks This will normally never be called on invalid guest page
3455 * translation entries.
3456 */
3457PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3458{
3459 PVM pVM = pVCpu->CTX_SUFF(pVM);
3460
3461 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3462
3463 Assert(HWACCMIsNestedPagingActive(pVM) == pVM->pgm.s.fNestedPaging);
3464 Assert(!pVM->pgm.s.fNestedPaging);
3465#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3466 || PGM_GST_TYPE == PGM_TYPE_REAL \
3467 || PGM_GST_TYPE == PGM_TYPE_PROT \
3468 || PGM_GST_TYPE == PGM_TYPE_PAE \
3469 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3470 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3471 && PGM_SHW_TYPE != PGM_TYPE_EPT
3472
3473# ifndef IN_RING0
3474 if (!(fPage & X86_PTE_US))
3475 {
3476 /*
3477 * Mark this page as safe.
3478 */
3479 /** @todo not correct for pages that contain both code and data!! */
3480 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3481 CSAMMarkPage(pVM, GCPtrPage, true);
3482 }
3483# endif
3484
3485 /*
3486 * Get guest PD and index.
3487 */
3488 /** @todo Performance: We've done all this a jiffy ago in the
3489 * PGMGstGetPage call. */
3490# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3491# if PGM_GST_TYPE == PGM_TYPE_32BIT
3492 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3493 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3494
3495# elif PGM_GST_TYPE == PGM_TYPE_PAE
3496 unsigned iPDSrc = 0;
3497 X86PDPE PdpeSrc;
3498 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3499 if (RT_UNLIKELY(!pPDSrc))
3500 {
3501 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3502 return VINF_EM_RAW_GUEST_TRAP;
3503 }
3504
3505# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3506 unsigned iPDSrc = 0; /* shut up gcc */
3507 PX86PML4E pPml4eSrc = NULL; /* ditto */
3508 X86PDPE PdpeSrc;
3509 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3510 if (RT_UNLIKELY(!pPDSrc))
3511 {
3512 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3513 return VINF_EM_RAW_GUEST_TRAP;
3514 }
3515# endif
3516
3517# else /* !PGM_WITH_PAGING */
3518 PGSTPD pPDSrc = NULL;
3519 const unsigned iPDSrc = 0;
3520# endif /* !PGM_WITH_PAGING */
3521 int rc = VINF_SUCCESS;
3522
3523 pgmLock(pVM);
3524
3525 /*
3526 * First check if the shadow pd is present.
3527 */
3528# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3529 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3530
3531# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3532 PX86PDEPAE pPdeDst;
3533 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3534 PX86PDPAE pPDDst;
3535# if PGM_GST_TYPE != PGM_TYPE_PAE
3536 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3537 X86PDPE PdpeSrc;
3538 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3539# endif
3540 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, &PdpeSrc, &pPDDst);
3541 if (rc != VINF_SUCCESS)
3542 {
3543 pgmUnlock(pVM);
3544 AssertRC(rc);
3545 return rc;
3546 }
3547 Assert(pPDDst);
3548 pPdeDst = &pPDDst->a[iPDDst];
3549
3550# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3551 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3552 PX86PDPAE pPDDst;
3553 PX86PDEPAE pPdeDst;
3554
3555# if PGM_GST_TYPE == PGM_TYPE_PROT
3556 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3557 X86PML4E Pml4eSrc;
3558 X86PDPE PdpeSrc;
3559 PX86PML4E pPml4eSrc = &Pml4eSrc;
3560 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3561 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3562# endif
3563
3564 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc, &PdpeSrc, &pPDDst);
3565 if (rc != VINF_SUCCESS)
3566 {
3567 pgmUnlock(pVM);
3568 AssertRC(rc);
3569 return rc;
3570 }
3571 Assert(pPDDst);
3572 pPdeDst = &pPDDst->a[iPDDst];
3573# endif
3574
3575# if defined(IN_RC)
3576 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3577 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3578# endif
3579
3580 if (!pPdeDst->n.u1Present)
3581 {
3582 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3583 if (rc != VINF_SUCCESS)
3584 {
3585# if defined(IN_RC)
3586 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3587 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3588# endif
3589 pgmUnlock(pVM);
3590 AssertRC(rc);
3591 return rc;
3592 }
3593 }
3594
3595# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3596 /* Check for dirty bit fault */
3597 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3598 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3599 Log(("PGMVerifyAccess: success (dirty)\n"));
3600 else
3601# endif
3602 {
3603# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3604 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3605# else
3606 GSTPDE PdeSrc;
3607 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3608 PdeSrc.n.u1Present = 1;
3609 PdeSrc.n.u1Write = 1;
3610 PdeSrc.n.u1Accessed = 1;
3611 PdeSrc.n.u1User = 1;
3612# endif
3613
3614 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3615 if (uErr & X86_TRAP_PF_US)
3616 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3617 else /* supervisor */
3618 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3619
3620 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3621 if (RT_SUCCESS(rc))
3622 {
3623 /* Page was successfully synced */
3624 Log2(("PGMVerifyAccess: success (sync)\n"));
3625 rc = VINF_SUCCESS;
3626 }
3627 else
3628 {
3629 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3630 rc = VINF_EM_RAW_GUEST_TRAP;
3631 }
3632 }
3633# if defined(IN_RC)
3634 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3635 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3636# endif
3637 pgmUnlock(pVM);
3638 return rc;
3639
3640#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3641
3642 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3643 return VERR_INTERNAL_ERROR;
3644#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3645}
3646
3647
3648/**
3649 * Syncs the paging hierarchy starting at CR3.
3650 *
3651 * @returns VBox status code, no specials.
3652 * @param pVCpu The VMCPU handle.
3653 * @param cr0 Guest context CR0 register
3654 * @param cr3 Guest context CR3 register
3655 * @param cr4 Guest context CR4 register
3656 * @param fGlobal Including global page directories or not
3657 */
3658PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3659{
3660 PVM pVM = pVCpu->CTX_SUFF(pVM);
3661
3662 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3663
3664#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3665
3666 pgmLock(pVM);
3667
3668# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3669 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3670 if (pPool->cDirtyPages)
3671 pgmPoolResetDirtyPages(pVM);
3672# endif
3673
3674 /*
3675 * Update page access handlers.
3676 * The virtual are always flushed, while the physical are only on demand.
3677 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3678 * have to look into that later because it will have a bad influence on the performance.
3679 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3680 * bird: Yes, but that won't work for aliases.
3681 */
3682 /** @todo this MUST go away. See #1557. */
3683 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3684 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3685 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3686 pgmUnlock(pVM);
3687#endif /* !NESTED && !EPT */
3688
3689#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3690 /*
3691 * Nested / EPT - almost no work.
3692 */
3693 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3694 return VINF_SUCCESS;
3695
3696#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3697 /*
3698 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3699 * out the shadow parts when the guest modifies its tables.
3700 */
3701 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3702 return VINF_SUCCESS;
3703
3704#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3705
3706# ifndef PGM_WITHOUT_MAPPINGS
3707 /*
3708 * Check for and resolve conflicts with our guest mappings if they
3709 * are enabled and not fixed.
3710 */
3711 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3712 {
3713 int rc = pgmMapResolveConflicts(pVM);
3714 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3715 if (rc == VINF_PGM_SYNC_CR3)
3716 {
3717 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3718 return VINF_PGM_SYNC_CR3;
3719 }
3720 }
3721# else
3722 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3723# endif
3724 return VINF_SUCCESS;
3725#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3726}
3727
3728
3729
3730
3731#ifdef VBOX_STRICT
3732#ifdef IN_RC
3733# undef AssertMsgFailed
3734# define AssertMsgFailed Log
3735#endif
3736#ifdef IN_RING3
3737# include <VBox/dbgf.h>
3738
3739/**
3740 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3741 *
3742 * @returns VBox status code (VINF_SUCCESS).
3743 * @param cr3 The root of the hierarchy.
3744 * @param crr The cr4, only PAE and PSE is currently used.
3745 * @param fLongMode Set if long mode, false if not long mode.
3746 * @param cMaxDepth Number of levels to dump.
3747 * @param pHlp Pointer to the output functions.
3748 */
3749RT_C_DECLS_BEGIN
3750VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
3751RT_C_DECLS_END
3752
3753#endif
3754
3755/**
3756 * Checks that the shadow page table is in sync with the guest one.
3757 *
3758 * @returns The number of errors.
3759 * @param pVM The virtual machine.
3760 * @param pVCpu The VMCPU handle.
3761 * @param cr3 Guest context CR3 register
3762 * @param cr4 Guest context CR4 register
3763 * @param GCPtr Where to start. Defaults to 0.
3764 * @param cb How much to check. Defaults to everything.
3765 */
3766PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3767{
3768#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3769 return 0;
3770#else
3771 unsigned cErrors = 0;
3772 PVM pVM = pVCpu->CTX_SUFF(pVM);
3773 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3774
3775#if PGM_GST_TYPE == PGM_TYPE_PAE
3776 /** @todo currently broken; crashes below somewhere */
3777 AssertFailed();
3778#endif
3779
3780#if PGM_GST_TYPE == PGM_TYPE_32BIT \
3781 || PGM_GST_TYPE == PGM_TYPE_PAE \
3782 || PGM_GST_TYPE == PGM_TYPE_AMD64
3783
3784# if PGM_GST_TYPE == PGM_TYPE_32BIT
3785 bool fBigPagesSupported = CPUMIsGuestPageSizeExtEnabled(pVCpu);
3786# else
3787 bool fBigPagesSupported = true;
3788# endif
3789 PPGMCPU pPGM = &pVCpu->pgm.s;
3790 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3791 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3792# ifndef IN_RING0
3793 RTHCPHYS HCPhys; /* general usage. */
3794# endif
3795 int rc;
3796
3797 /*
3798 * Check that the Guest CR3 and all its mappings are correct.
3799 */
3800 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
3801 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3802 false);
3803# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3804# if PGM_GST_TYPE == PGM_TYPE_32BIT
3805 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3806# else
3807 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3808# endif
3809 AssertRCReturn(rc, 1);
3810 HCPhys = NIL_RTHCPHYS;
3811 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
3812 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3813# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3814 pgmGstGet32bitPDPtr(pVCpu);
3815 RTGCPHYS GCPhys;
3816 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
3817 AssertRCReturn(rc, 1);
3818 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3819# endif
3820# endif /* !IN_RING0 */
3821
3822 /*
3823 * Get and check the Shadow CR3.
3824 */
3825# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3826 unsigned cPDEs = X86_PG_ENTRIES;
3827 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
3828# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3829# if PGM_GST_TYPE == PGM_TYPE_32BIT
3830 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3831# else
3832 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3833# endif
3834 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3835# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3836 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3837 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
3838# endif
3839 if (cb != ~(RTGCPTR)0)
3840 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3841
3842/** @todo call the other two PGMAssert*() functions. */
3843
3844# if PGM_GST_TYPE == PGM_TYPE_AMD64
3845 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3846
3847 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3848 {
3849 PPGMPOOLPAGE pShwPdpt = NULL;
3850 PX86PML4E pPml4eSrc;
3851 PX86PML4E pPml4eDst;
3852 RTGCPHYS GCPhysPdptSrc;
3853
3854 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3855 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
3856
3857 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3858 if (!pPml4eDst->n.u1Present)
3859 {
3860 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3861 continue;
3862 }
3863
3864 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3865 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
3866
3867 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
3868 {
3869 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3870 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3871 cErrors++;
3872 continue;
3873 }
3874
3875 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3876 {
3877 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3878 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3879 cErrors++;
3880 continue;
3881 }
3882
3883 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
3884 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
3885 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
3886 {
3887 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3888 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3889 cErrors++;
3890 continue;
3891 }
3892# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3893 {
3894# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3895
3896# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3897 /*
3898 * Check the PDPTEs too.
3899 */
3900 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3901
3902 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3903 {
3904 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3905 PPGMPOOLPAGE pShwPde = NULL;
3906 PX86PDPE pPdpeDst;
3907 RTGCPHYS GCPhysPdeSrc;
3908# if PGM_GST_TYPE == PGM_TYPE_PAE
3909 X86PDPE PdpeSrc;
3910 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3911 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
3912# else
3913 PX86PML4E pPml4eSrcIgn;
3914 X86PDPE PdpeSrc;
3915 PX86PDPT pPdptDst;
3916 PX86PDPAE pPDDst;
3917 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3918
3919 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3920 if (rc != VINF_SUCCESS)
3921 {
3922 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3923 GCPtr += 512 * _2M;
3924 continue; /* next PDPTE */
3925 }
3926 Assert(pPDDst);
3927# endif
3928 Assert(iPDSrc == 0);
3929
3930 pPdpeDst = &pPdptDst->a[iPdpt];
3931
3932 if (!pPdpeDst->n.u1Present)
3933 {
3934 GCPtr += 512 * _2M;
3935 continue; /* next PDPTE */
3936 }
3937
3938 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3939 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
3940
3941 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
3942 {
3943 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3944 GCPtr += 512 * _2M;
3945 cErrors++;
3946 continue;
3947 }
3948
3949 if (GCPhysPdeSrc != pShwPde->GCPhys)
3950 {
3951# if PGM_GST_TYPE == PGM_TYPE_AMD64
3952 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3953# else
3954 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3955# endif
3956 GCPtr += 512 * _2M;
3957 cErrors++;
3958 continue;
3959 }
3960
3961# if PGM_GST_TYPE == PGM_TYPE_AMD64
3962 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
3963 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
3964 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
3965 {
3966 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3967 GCPtr += 512 * _2M;
3968 cErrors++;
3969 continue;
3970 }
3971# endif
3972
3973# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3974 {
3975# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3976# if PGM_GST_TYPE == PGM_TYPE_32BIT
3977 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3978# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3979 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
3980# endif
3981# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3982 /*
3983 * Iterate the shadow page directory.
3984 */
3985 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3986 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3987
3988 for (;
3989 iPDDst < cPDEs;
3990 iPDDst++, GCPtr += cIncrement)
3991 {
3992# if PGM_SHW_TYPE == PGM_TYPE_PAE
3993 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
3994# else
3995 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3996# endif
3997 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
3998 {
3999 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
4000 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
4001 {
4002 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4003 cErrors++;
4004 continue;
4005 }
4006 }
4007 else if ( (PdeDst.u & X86_PDE_P)
4008 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4009 )
4010 {
4011 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4012 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
4013 if (!pPoolPage)
4014 {
4015 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4016 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4017 cErrors++;
4018 continue;
4019 }
4020 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4021
4022 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4023 {
4024 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4025 GCPtr, (uint64_t)PdeDst.u));
4026 cErrors++;
4027 }
4028
4029 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4030 {
4031 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4032 GCPtr, (uint64_t)PdeDst.u));
4033 cErrors++;
4034 }
4035
4036 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4037 if (!PdeSrc.n.u1Present)
4038 {
4039 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4040 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4041 cErrors++;
4042 continue;
4043 }
4044
4045 if ( !PdeSrc.b.u1Size
4046 || !fBigPagesSupported)
4047 {
4048 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4049# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4050 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4051# endif
4052 }
4053 else
4054 {
4055# if PGM_GST_TYPE == PGM_TYPE_32BIT
4056 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4057 {
4058 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4059 GCPtr, (uint64_t)PdeSrc.u));
4060 cErrors++;
4061 continue;
4062 }
4063# endif
4064 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
4065# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4066 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4067# endif
4068 }
4069
4070 if ( pPoolPage->enmKind
4071 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4072 {
4073 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4074 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4075 cErrors++;
4076 }
4077
4078 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4079 if (!pPhysPage)
4080 {
4081 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4082 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4083 cErrors++;
4084 continue;
4085 }
4086
4087 if (GCPhysGst != pPoolPage->GCPhys)
4088 {
4089 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4090 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4091 cErrors++;
4092 continue;
4093 }
4094
4095 if ( !PdeSrc.b.u1Size
4096 || !fBigPagesSupported)
4097 {
4098 /*
4099 * Page Table.
4100 */
4101 const GSTPT *pPTSrc;
4102 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4103 if (RT_FAILURE(rc))
4104 {
4105 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4106 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4107 cErrors++;
4108 continue;
4109 }
4110 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4111 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4112 {
4113 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4114 // (This problem will go away when/if we shadow multiple CR3s.)
4115 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4116 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4117 cErrors++;
4118 continue;
4119 }
4120 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4121 {
4122 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4123 GCPtr, (uint64_t)PdeDst.u));
4124 cErrors++;
4125 continue;
4126 }
4127
4128 /* iterate the page table. */
4129# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4130 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4131 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4132# else
4133 const unsigned offPTSrc = 0;
4134# endif
4135 for (unsigned iPT = 0, off = 0;
4136 iPT < RT_ELEMENTS(pPTDst->a);
4137 iPT++, off += PAGE_SIZE)
4138 {
4139 const SHWPTE PteDst = pPTDst->a[iPT];
4140
4141 /* skip not-present entries. */
4142 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4143 continue;
4144 Assert(PteDst.n.u1Present);
4145
4146 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4147 if (!PteSrc.n.u1Present)
4148 {
4149# ifdef IN_RING3
4150 PGMAssertHandlerAndFlagsInSync(pVM);
4151 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4152# endif
4153 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4154 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4155 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4156 cErrors++;
4157 continue;
4158 }
4159
4160 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4161# if 1 /** @todo sync accessed bit properly... */
4162 fIgnoreFlags |= X86_PTE_A;
4163# endif
4164
4165 /* match the physical addresses */
4166 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4167 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4168
4169# ifdef IN_RING3
4170 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4171 if (RT_FAILURE(rc))
4172 {
4173 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4174 {
4175 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4176 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4177 cErrors++;
4178 continue;
4179 }
4180 }
4181 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4182 {
4183 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4184 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4185 cErrors++;
4186 continue;
4187 }
4188# endif
4189
4190 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4191 if (!pPhysPage)
4192 {
4193# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4194 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4195 {
4196 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4197 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4198 cErrors++;
4199 continue;
4200 }
4201# endif
4202 if (PteDst.n.u1Write)
4203 {
4204 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4205 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4206 cErrors++;
4207 }
4208 fIgnoreFlags |= X86_PTE_RW;
4209 }
4210 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4211 {
4212 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4213 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4214 cErrors++;
4215 continue;
4216 }
4217
4218 /* flags */
4219 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4220 {
4221 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4222 {
4223 if (PteDst.n.u1Write)
4224 {
4225 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4226 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4227 cErrors++;
4228 continue;
4229 }
4230 fIgnoreFlags |= X86_PTE_RW;
4231 }
4232 else
4233 {
4234 if ( PteDst.n.u1Present
4235# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4236 && !PGM_PAGE_IS_MMIO(pPhysPage)
4237# endif
4238 )
4239 {
4240 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4241 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4242 cErrors++;
4243 continue;
4244 }
4245 fIgnoreFlags |= X86_PTE_P;
4246 }
4247 }
4248 else
4249 {
4250 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4251 {
4252 if (PteDst.n.u1Write)
4253 {
4254 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4255 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4256 cErrors++;
4257 continue;
4258 }
4259 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4260 {
4261 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4262 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4263 cErrors++;
4264 continue;
4265 }
4266 if (PteDst.n.u1Dirty)
4267 {
4268 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4269 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4270 cErrors++;
4271 }
4272# if 0 /** @todo sync access bit properly... */
4273 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4274 {
4275 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4276 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4277 cErrors++;
4278 }
4279 fIgnoreFlags |= X86_PTE_RW;
4280# else
4281 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4282# endif
4283 }
4284 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4285 {
4286 /* access bit emulation (not implemented). */
4287 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4288 {
4289 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4290 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4291 cErrors++;
4292 continue;
4293 }
4294 if (!PteDst.n.u1Accessed)
4295 {
4296 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4297 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4298 cErrors++;
4299 }
4300 fIgnoreFlags |= X86_PTE_P;
4301 }
4302# ifdef DEBUG_sandervl
4303 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4304# endif
4305 }
4306
4307 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4308 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4309 )
4310 {
4311 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4312 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4313 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4314 cErrors++;
4315 continue;
4316 }
4317 } /* foreach PTE */
4318 }
4319 else
4320 {
4321 /*
4322 * Big Page.
4323 */
4324 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4325 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4326 {
4327 if (PdeDst.n.u1Write)
4328 {
4329 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4330 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4331 cErrors++;
4332 continue;
4333 }
4334 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4335 {
4336 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4337 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4338 cErrors++;
4339 continue;
4340 }
4341# if 0 /** @todo sync access bit properly... */
4342 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4343 {
4344 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4345 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4346 cErrors++;
4347 }
4348 fIgnoreFlags |= X86_PTE_RW;
4349# else
4350 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4351# endif
4352 }
4353 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4354 {
4355 /* access bit emulation (not implemented). */
4356 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4357 {
4358 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4359 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4360 cErrors++;
4361 continue;
4362 }
4363 if (!PdeDst.n.u1Accessed)
4364 {
4365 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4366 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4367 cErrors++;
4368 }
4369 fIgnoreFlags |= X86_PTE_P;
4370 }
4371
4372 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4373 {
4374 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4375 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4376 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4377 cErrors++;
4378 }
4379
4380 /* iterate the page table. */
4381 for (unsigned iPT = 0, off = 0;
4382 iPT < RT_ELEMENTS(pPTDst->a);
4383 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4384 {
4385 const SHWPTE PteDst = pPTDst->a[iPT];
4386
4387 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4388 {
4389 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4390 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4391 cErrors++;
4392 }
4393
4394 /* skip not-present entries. */
4395 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4396 continue;
4397
4398 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4399
4400 /* match the physical addresses */
4401 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4402
4403# ifdef IN_RING3
4404 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4405 if (RT_FAILURE(rc))
4406 {
4407 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4408 {
4409 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4410 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4411 cErrors++;
4412 }
4413 }
4414 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4415 {
4416 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4417 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4418 cErrors++;
4419 continue;
4420 }
4421# endif
4422 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4423 if (!pPhysPage)
4424 {
4425# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4426 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4427 {
4428 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4429 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4430 cErrors++;
4431 continue;
4432 }
4433# endif
4434 if (PteDst.n.u1Write)
4435 {
4436 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4437 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4438 cErrors++;
4439 }
4440 fIgnoreFlags |= X86_PTE_RW;
4441 }
4442 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4443 {
4444 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4445 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4446 cErrors++;
4447 continue;
4448 }
4449
4450 /* flags */
4451 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4452 {
4453 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4454 {
4455 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4456 {
4457 if (PteDst.n.u1Write)
4458 {
4459 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4460 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4461 cErrors++;
4462 continue;
4463 }
4464 fIgnoreFlags |= X86_PTE_RW;
4465 }
4466 }
4467 else
4468 {
4469 if ( PteDst.n.u1Present
4470# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4471 && !PGM_PAGE_IS_MMIO(pPhysPage)
4472# endif
4473 )
4474 {
4475 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4476 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4477 cErrors++;
4478 continue;
4479 }
4480 fIgnoreFlags |= X86_PTE_P;
4481 }
4482 }
4483
4484 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4485 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4486 )
4487 {
4488 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4489 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4490 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4491 cErrors++;
4492 continue;
4493 }
4494 } /* for each PTE */
4495 }
4496 }
4497 /* not present */
4498
4499 } /* for each PDE */
4500
4501 } /* for each PDPTE */
4502
4503 } /* for each PML4E */
4504
4505# ifdef DEBUG
4506 if (cErrors)
4507 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4508# endif
4509
4510#endif /* GST == 32BIT, PAE or AMD64 */
4511 return cErrors;
4512
4513#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4514}
4515#endif /* VBOX_STRICT */
4516
4517
4518/**
4519 * Sets up the CR3 for shadow paging
4520 *
4521 * @returns Strict VBox status code.
4522 * @retval VINF_SUCCESS.
4523 *
4524 * @param pVCpu The VMCPU handle.
4525 * @param GCPhysCR3 The physical address in the CR3 register.
4526 */
4527PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4528{
4529 PVM pVM = pVCpu->CTX_SUFF(pVM);
4530
4531 /* Update guest paging info. */
4532#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4533 || PGM_GST_TYPE == PGM_TYPE_PAE \
4534 || PGM_GST_TYPE == PGM_TYPE_AMD64
4535
4536 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4537
4538 /*
4539 * Map the page CR3 points at.
4540 */
4541 RTHCPTR HCPtrGuestCR3;
4542 RTHCPHYS HCPhysGuestCR3;
4543 pgmLock(pVM);
4544 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4545 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4546 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4547 /** @todo this needs some reworking wrt. locking. */
4548# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4549 HCPtrGuestCR3 = NIL_RTHCPTR;
4550 int rc = VINF_SUCCESS;
4551# else
4552 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4553# endif
4554 pgmUnlock(pVM);
4555 if (RT_SUCCESS(rc))
4556 {
4557 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4558 if (RT_SUCCESS(rc))
4559 {
4560# ifdef IN_RC
4561 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4562# endif
4563# if PGM_GST_TYPE == PGM_TYPE_32BIT
4564 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4565# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4566 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4567# endif
4568 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4569
4570# elif PGM_GST_TYPE == PGM_TYPE_PAE
4571 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4572 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4573# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4574 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4575# endif
4576 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4577 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4578
4579 /*
4580 * Map the 4 PDs too.
4581 */
4582 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4583 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4584 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4585 {
4586 if (pGuestPDPT->a[i].n.u1Present)
4587 {
4588 RTHCPTR HCPtr;
4589 RTHCPHYS HCPhys;
4590 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4591 pgmLock(pVM);
4592 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4593 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4594 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4595# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4596 HCPtr = NIL_RTHCPTR;
4597 int rc2 = VINF_SUCCESS;
4598# else
4599 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4600# endif
4601 pgmUnlock(pVM);
4602 if (RT_SUCCESS(rc2))
4603 {
4604 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4605 AssertRCReturn(rc, rc);
4606
4607 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4608# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4609 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4610# endif
4611 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4612 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4613# ifdef IN_RC
4614 PGM_INVL_PG(pVCpu, GCPtr);
4615# endif
4616 continue;
4617 }
4618 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4619 }
4620
4621 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4622# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4623 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4624# endif
4625 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4626 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4627# ifdef IN_RC
4628 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4629# endif
4630 }
4631
4632# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4633 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4634# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4635 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4636# endif
4637# endif
4638 }
4639 else
4640 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4641 }
4642 else
4643 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4644
4645#else /* prot/real stub */
4646 int rc = VINF_SUCCESS;
4647#endif
4648
4649 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4650# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4651 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4652 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4653 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4654 && PGM_GST_TYPE != PGM_TYPE_PROT))
4655
4656 Assert(HWACCMIsNestedPagingActive(pVM) == pVM->pgm.s.fNestedPaging);
4657 Assert(!pVM->pgm.s.fNestedPaging);
4658
4659 /*
4660 * Update the shadow root page as well since that's not fixed.
4661 */
4662 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4663 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4664 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4665 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4666 PPGMPOOLPAGE pNewShwPageCR3;
4667
4668 pgmLock(pVM);
4669
4670# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4671 if (pPool->cDirtyPages)
4672 pgmPoolResetDirtyPages(pVM);
4673# endif
4674
4675 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4676 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4677 AssertFatalRC(rc);
4678 rc = VINF_SUCCESS;
4679
4680# ifdef IN_RC
4681 /*
4682 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4683 * state will be inconsistent! Flush important things now while
4684 * we still can and then make sure there are no ring-3 calls.
4685 */
4686 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4687 VMMRZCallRing3Disable(pVCpu);
4688# endif
4689
4690 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4691 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4692 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4693# ifdef IN_RING0
4694 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4695 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4696# elif defined(IN_RC)
4697 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4698 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4699# else
4700 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4701 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4702# endif
4703
4704# ifndef PGM_WITHOUT_MAPPINGS
4705 /*
4706 * Apply all hypervisor mappings to the new CR3.
4707 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4708 * make sure we check for conflicts in the new CR3 root.
4709 */
4710# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4711 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4712# endif
4713 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4714 AssertRCReturn(rc, rc);
4715# endif
4716
4717 /* Set the current hypervisor CR3. */
4718 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4719 SELMShadowCR3Changed(pVM, pVCpu);
4720
4721# ifdef IN_RC
4722 /* NOTE: The state is consistent again. */
4723 VMMRZCallRing3Enable(pVCpu);
4724# endif
4725
4726 /* Clean up the old CR3 root. */
4727 if ( pOldShwPageCR3
4728 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4729 {
4730 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4731# ifndef PGM_WITHOUT_MAPPINGS
4732 /* Remove the hypervisor mappings from the shadow page table. */
4733 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4734# endif
4735 /* Mark the page as unlocked; allow flushing again. */
4736 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4737
4738 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4739 }
4740 pgmUnlock(pVM);
4741# endif
4742
4743 return rc;
4744}
4745
4746/**
4747 * Unmaps the shadow CR3.
4748 *
4749 * @returns VBox status, no specials.
4750 * @param pVCpu The VMCPU handle.
4751 */
4752PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4753{
4754 LogFlow(("UnmapCR3\n"));
4755
4756 int rc = VINF_SUCCESS;
4757 PVM pVM = pVCpu->CTX_SUFF(pVM);
4758
4759 /*
4760 * Update guest paging info.
4761 */
4762#if PGM_GST_TYPE == PGM_TYPE_32BIT
4763 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4764# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4765 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4766# endif
4767 pVCpu->pgm.s.pGst32BitPdRC = 0;
4768
4769#elif PGM_GST_TYPE == PGM_TYPE_PAE
4770 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4771# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4772 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4773# endif
4774 pVCpu->pgm.s.pGstPaePdptRC = 0;
4775 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4776 {
4777 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4778# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4779 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4780# endif
4781 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4782 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4783 }
4784
4785#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4786 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4787# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4788 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4789# endif
4790
4791#else /* prot/real mode stub */
4792 /* nothing to do */
4793#endif
4794
4795#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
4796 /*
4797 * Update shadow paging info.
4798 */
4799# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4800 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4801 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4802
4803# if PGM_GST_TYPE != PGM_TYPE_REAL
4804 Assert(HWACCMIsNestedPagingActive(pVM) == pVM->pgm.s.fNestedPaging);
4805 Assert(!pVM->pgm.s.fNestedPaging);
4806# endif
4807
4808 pgmLock(pVM);
4809
4810# ifndef PGM_WITHOUT_MAPPINGS
4811 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4812 /* Remove the hypervisor mappings from the shadow page table. */
4813 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4814# endif
4815
4816 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4817 {
4818 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4819
4820 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
4821
4822# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4823 if (pPool->cDirtyPages)
4824 pgmPoolResetDirtyPages(pVM);
4825# endif
4826
4827 /* Mark the page as unlocked; allow flushing again. */
4828 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4829
4830 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
4831 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4832 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4833 pVCpu->pgm.s.pShwPageCR3RC = 0;
4834 pVCpu->pgm.s.iShwUser = 0;
4835 pVCpu->pgm.s.iShwUserTable = 0;
4836 }
4837 pgmUnlock(pVM);
4838# endif
4839#endif /* !IN_RC*/
4840
4841 return rc;
4842}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette