VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 31089

Last change on this file since 31089 was 31089, checked in by vboxsync, 14 years ago

PGMAllBth.h: Enabled the GstWalk code again as it doesn't seem to make any difference on the tests after all.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 217.9 KB
Line 
1/* $Id: PGMAllBth.h 31089 2010-07-26 06:43:33Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks The nested page tables on AMD makes use of PGM_SHW_TYPE in
6 * {PGM_TYPE_AMD64, PGM_TYPE_PAE and PGM_TYPE_32BIT} and PGM_GST_TYPE
7 * set to PGM_TYPE_PROT. Half of the code in this file is not
8 * exercised with PGM_SHW_TYPE set to PGM_TYPE_NESTED.
9 *
10 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
11 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
12 *
13 * @remarks This file is one big \#ifdef-orgy!
14 *
15 */
16
17/*
18 * Copyright (C) 2006-2010 Oracle Corporation
19 *
20 * This file is part of VirtualBox Open Source Edition (OSE), as
21 * available from http://www.virtualbox.org. This file is free software;
22 * you can redistribute it and/or modify it under the terms of the GNU
23 * General Public License (GPL) as published by the Free Software
24 * Foundation, in version 2 as it comes in the "COPYING" file of the
25 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
26 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
27 */
28
29
30/*******************************************************************************
31* Internal Functions *
32*******************************************************************************/
33RT_C_DECLS_BEGIN
34PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
35PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
36PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
37PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage);
38PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
39PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
40PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
41PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage);
42PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
43#ifdef VBOX_STRICT
44PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
45#endif
46DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte);
47PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3);
48PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu);
49RT_C_DECLS_END
50
51
52/*
53 * Filter out some illegal combinations of guest and shadow paging, so we can
54 * remove redundant checks inside functions.
55 */
56#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
57# error "Invalid combination; PAE guest implies PAE shadow"
58#endif
59
60#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
61 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
62# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
63#endif
64
65#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
66 && !(PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT)
67# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
68#endif
69
70#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT) \
71 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
72# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
73#endif
74
75/* enables the new code. */
76#define PGM_WITH_GST_WALK
77
78#ifndef IN_RING3
79
80#ifdef PGM_WITH_GST_WALK
81# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
82/**
83 * Deal with a guest page fault.
84 *
85 * @returns Strict VBox status code.
86 * @retval VINF_EM_RAW_GUEST_TRAP
87 * @retval VINF_EM_RAW_EMULATE_INSTR
88 *
89 * @param pVCpu The current CPU.
90 * @param pGstWalk The guest page table walk result.
91 * @param uErr The error code.
92 */
93PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPU pVCpu, PGSTPTWALK pGstWalk, RTGCUINT uErr)
94{
95# if !defined(PGM_WITHOUT_MAPPINGS) && (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE)
96 /*
97 * Check for write conflicts with our hypervisor mapping.
98 *
99 * If the guest happens to access a non-present page, where our hypervisor
100 * is currently mapped, then we'll create a #PF storm in the guest.
101 */
102 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
103 && MMHyperIsInsideArea(pVCpu->CTX_SUFF(pVM), pGstWalk->Core.GCPtr))
104 {
105 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
106 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
107 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
108 return VINF_EM_RAW_EMULATE_INSTR;
109 }
110# endif
111
112 /*
113 * Calc the error code for the guest trap.
114 */
115 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
116 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
117 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
118 if (pGstWalk->Core.fBadPhysAddr)
119 {
120 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
121 Assert(!pGstWalk->Core.fNotPresent);
122 }
123 else if (!pGstWalk->Core.fNotPresent)
124 uNewErr |= X86_TRAP_PF_P;
125 TRPMSetErrorCode(pVCpu, uNewErr);
126
127 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
128 return VINF_EM_RAW_GUEST_TRAP;
129}
130# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
131#endif /* PGM_WITH_GST_WALK */
132
133
134/**
135 * #PF Handler for raw-mode guest execution.
136 *
137 * @returns VBox status code (appropriate for trap handling and GC return).
138 *
139 * @param pVCpu VMCPU Handle.
140 * @param uErr The trap error code.
141 * @param pRegFrame Trap register frame.
142 * @param pvFault The fault address.
143 * @param pfLockTaken PGM lock taken here or not (out)
144 */
145PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
146{
147 PVM pVM = pVCpu->CTX_SUFF(pVM);
148
149 *pfLockTaken = false;
150
151# if defined(IN_RC) && defined(VBOX_STRICT)
152 PGMDynCheckLocks(pVM);
153# endif
154
155# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
156 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
157 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
158 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
159#ifdef PGM_WITH_GST_WALK
160 int rc;
161
162# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
163 /*
164 * Walk the guest page translation tables and check if it's a guest fault.
165 */
166 GSTPTWALK GstWalk;
167 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &GstWalk);
168 if (RT_FAILURE_NP(rc))
169 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
170# if PGM_GST_TYPE == PGM_TYPE_AMD64
171 AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u));
172# endif
173# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
174 AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u));
175# endif
176 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
177 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
178
179 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
180 {
181 if ( ( (uErr & X86_TRAP_PF_RW)
182 && !GstWalk.Core.fEffectiveRW
183 && ( (uErr & X86_TRAP_PF_US)
184 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
185 || ((uErr & X86_TRAP_PF_US) && !GstWalk.Core.fEffectiveUS)
186 || ((uErr & X86_TRAP_PF_ID) && GstWalk.Core.fEffectiveNX)
187 )
188 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &GstWalk, uErr));
189 }
190
191 /*
192 * Set the accessed and dirty flags.
193 */
194# if PGM_GST_TYPE == PGM_TYPE_AMD64
195 GstWalk.Pml4e.u |= X86_PML4E_A;
196 GstWalk.pPml4e->u |= X86_PML4E_A;
197 GstWalk.Pdpe.u |= X86_PDPE_A;
198 GstWalk.pPdpe->u |= X86_PDPE_A;
199# endif
200 if (GstWalk.Core.fBigPage)
201 {
202 Assert(GstWalk.Pde.b.u1Size);
203 if (uErr & X86_TRAP_PF_RW)
204 {
205 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
206 GstWalk.pPde->u |= X86_PDE4M_A | X86_PDE4M_D;
207 }
208 else
209 {
210 GstWalk.Pde.u |= X86_PDE4M_A;
211 GstWalk.pPde->u |= X86_PDE4M_A;
212 }
213 }
214 else
215 {
216 Assert(!GstWalk.Pde.b.u1Size);
217 GstWalk.Pde.u |= X86_PDE_A;
218 GstWalk.pPde->u |= X86_PDE_A;
219 if (uErr & X86_TRAP_PF_RW)
220 {
221# ifdef VBOX_WITH_STATISTICS
222 if (!GstWalk.Pte.n.u1Dirty)
223 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
224 else
225 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
226# endif
227 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
228 GstWalk.pPte->u |= X86_PTE_A | X86_PTE_D;
229 }
230 else
231 {
232 GstWalk.Pte.u |= X86_PTE_A;
233 GstWalk.pPte->u |= X86_PTE_A;
234 }
235 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
236 }
237 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
238 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
239# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
240 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A};
241# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
242
243#else /* !PGM_WITH_GST_WALK */
244
245# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE != PGM_TYPE_PAE
246 /*
247 * Hide the instruction fetch trap indicator if NX isn't active.
248 */
249 /** @todo do this only when returning with a guest trap! */
250 if ((uErr & X86_TRAP_PF_ID) && !pVCpu->pgm.s.fNoExecuteEnabled)
251 {
252 uErr &= ~X86_TRAP_PF_ID;
253 TRPMSetErrorCode(pVCpu, uErr);
254 }
255# endif
256
257 /*
258 * Get PDs.
259 */
260 int rc;
261# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
262# if PGM_GST_TYPE == PGM_TYPE_32BIT
263 const unsigned iPDSrc = pvFault >> GST_PD_SHIFT;
264 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
265
266# elif PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64
267
268# if PGM_GST_TYPE == PGM_TYPE_PAE
269 unsigned iPDSrc = 0; /* initialized to shut up gcc */
270 X86PDPE PdpeSrc;
271 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, pvFault, &iPDSrc, &PdpeSrc);
272
273# elif PGM_GST_TYPE == PGM_TYPE_AMD64
274 unsigned iPDSrc = 0; /* initialized to shut up gcc */
275 PX86PML4E pPml4eSrc = NULL; /* ditto */
276 X86PDPE PdpeSrc;
277 PGSTPD pPDSrc;
278
279 pPDSrc = pgmGstGetLongModePDPtr(pVCpu, pvFault, &pPml4eSrc, &PdpeSrc, &iPDSrc);
280 Assert(pPml4eSrc);
281# endif
282
283 /* Quick check for a valid guest trap. (PAE & AMD64) */
284 if (!pPDSrc)
285 {
286# if PGM_GST_TYPE == PGM_TYPE_AMD64 && GC_ARCH_BITS == 64
287 LogFlow(("Trap0eHandler: guest PML4 %d not present CR3=%RGp\n", (int)((pvFault >> X86_PML4_SHIFT) & X86_PML4_MASK), CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
288# else
289 LogFlow(("Trap0eHandler: guest iPDSrc=%u not present CR3=%RGp\n", iPDSrc, CPUMGetGuestCR3(pVCpu) & X86_CR3_PAGE_MASK));
290# endif
291 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
292 TRPMSetErrorCode(pVCpu, uErr);
293 return VINF_EM_RAW_GUEST_TRAP;
294 }
295# endif
296
297# else /* !PGM_WITH_PAGING */
298 PGSTPD pPDSrc = NULL;
299 const unsigned iPDSrc = 0;
300# endif /* !PGM_WITH_PAGING */
301
302# if !defined(PGM_WITHOUT_MAPPINGS) && ((PGM_GST_TYPE == PGM_TYPE_32BIT) || (PGM_GST_TYPE == PGM_TYPE_PAE))
303 /*
304 * Check for write conflicts with our hypervisor mapping early on. If the guest happens to access a non-present page,
305 * where our hypervisor is currently mapped, then we'll create a #PF storm in the guest.
306 */
307 if ( (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW)
308 && MMHyperIsInsideArea(pVM, pvFault))
309 {
310 /* Force a CR3 sync to check for conflicts and emulate the instruction. */
311 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
312 return VINF_EM_RAW_EMULATE_INSTR;
313 }
314# endif
315
316 /*
317 * First check for a genuine guest page fault.
318 */
319 /** @todo This duplicates the page table walk we're doing below. Need to
320 * find some way to avoid this double work, probably by caching
321 * the data. */
322# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
323 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
324 rc = PGM_BTH_NAME(CheckPageFault)(pVCpu, uErr, &pPDSrc->a[iPDSrc], pvFault);
325 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeCheckPageFault, e);
326 if (rc == VINF_EM_RAW_GUEST_TRAP)
327 {
328 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
329 return rc;
330 }
331# endif /* PGM_WITH_PAGING */
332#endif /* !PGM_WITH_GST_WALK */
333
334 /* Take the big lock now. */
335 *pfLockTaken = true;
336 pgmLock(pVM);
337
338 /*
339 * Fetch the guest PDE, PDPE and PML4E.
340 */
341#ifndef PGM_WITH_GST_WALK
342# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
343 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
344# else
345 GSTPDE PdeSrc;
346 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
347 PdeSrc.n.u1Present = 1;
348 PdeSrc.n.u1Write = 1;
349 PdeSrc.n.u1Accessed = 1;
350 PdeSrc.n.u1User = 1;
351# endif
352
353#endif /* !PGM_WITH_GST_WALK */
354# if PGM_SHW_TYPE == PGM_TYPE_32BIT
355 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
356 PX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
357
358# elif PGM_SHW_TYPE == PGM_TYPE_PAE
359 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
360
361 PX86PDPAE pPDDst;
362#ifdef PGM_WITH_GST_WALK
363# if PGM_GST_TYPE == PGM_TYPE_PAE
364 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
365# else
366 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
367# endif
368#else
369# if PGM_GST_TYPE != PGM_TYPE_PAE
370 X86PDPE PdpeSrc;
371
372 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
373 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
374# endif
375 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, PdpeSrc.u, &pPDDst);
376#endif
377 if (rc != VINF_SUCCESS)
378 {
379 AssertRC(rc);
380 return rc;
381 }
382 Assert(pPDDst);
383
384# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
385 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
386 PX86PDPAE pPDDst;
387#ifdef PGM_WITH_GST_WALK
388# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
389 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
390 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
391# else
392 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
393# endif
394#else
395# if PGM_GST_TYPE == PGM_TYPE_PROT
396 /* AMD-V nested paging */
397 X86PML4E Pml4eSrc;
398 X86PDPE PdpeSrc;
399 PX86PML4E pPml4eSrc = &Pml4eSrc;
400
401 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
402 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
403 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
404# endif
405
406 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
407#endif /* !PGM_WITH_GST_WALK */
408 if (rc != VINF_SUCCESS)
409 {
410 AssertRC(rc);
411 return rc;
412 }
413 Assert(pPDDst);
414
415# elif PGM_SHW_TYPE == PGM_TYPE_EPT
416 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
417 PEPTPD pPDDst;
418
419 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
420 if (rc != VINF_SUCCESS)
421 {
422 AssertRC(rc);
423 return rc;
424 }
425 Assert(pPDDst);
426# endif
427
428# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
429 /* Dirty page handling. */
430 if (uErr & X86_TRAP_PF_RW) /* write fault? */
431 {
432 /*
433 * If we successfully correct the write protection fault due to dirty bit
434 * tracking, then return immediately.
435 */
436 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
437#ifdef PGM_WITH_GST_WALK
438 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
439#else
440 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], &pPDSrc->a[iPDSrc], pvFault);
441#endif
442 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyBitTracking), a);
443 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
444 {
445 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)
446 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT ? &pVCpu->pgm.s.StatRZTrap0eTime2DirtyAndAccessed : &pVCpu->pgm.s.StatRZTrap0eTime2GuestTrap; });
447 LogBird(("Trap0eHandler: returns VINF_SUCCESS\n"));
448 return VINF_SUCCESS;
449 }
450#ifdef PGM_WITH_GST_WALK
451 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u));
452 AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u));
453#endif
454 }
455
456# if 0 /* rarely useful; leave for debugging. */
457 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
458# endif
459# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
460
461 /*
462 * A common case is the not-present error caused by lazy page table syncing.
463 *
464 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
465 * here so we can safely assume that the shadow PT is present when calling
466 * SyncPage later.
467 *
468 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
469 * of mapping conflict and defer to SyncCR3 in R3.
470 * (Again, we do NOT support access handlers for non-present guest pages.)
471 *
472 */
473#ifdef PGM_WITH_GST_WALK
474# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
475 Assert(GstWalk.Pde.n.u1Present);
476# endif
477#else
478 Assert(PdeSrc.n.u1Present);
479#endif
480 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
481 && !pPDDst->a[iPDDst].n.u1Present
482 )
483 {
484 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2SyncPT; });
485 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
486#ifdef PGM_WITH_GST_WALK
487# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
488 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
489 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
490# else
491 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
492 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
493# endif
494#else /* !PGM_WITH_GST_WALK */
495 LogFlow(("=>SyncPT %04x = %08x\n", iPDSrc, PdeSrc.au32[0]));
496 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, pvFault);
497#endif /* !PGM_WITH_GST_WALK */
498 if (RT_SUCCESS(rc))
499 {
500 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
501 return rc;
502 }
503#ifdef PGM_WITH_GST_WALK
504 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
505#else
506 Log(("SyncPT: %d failed!! rc=%d\n", iPDSrc, rc));
507#endif
508 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
509 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeSyncPT, f);
510 return VINF_PGM_SYNC_CR3;
511 }
512
513# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(PGM_WITHOUT_MAPPINGS)
514 /*
515 * Check if this address is within any of our mappings.
516 *
517 * This is *very* fast and it's gonna save us a bit of effort below and prevent
518 * us from screwing ourself with MMIO2 pages which have a GC Mapping (VRam).
519 * (BTW, it's impossible to have physical access handlers in a mapping.)
520 */
521 if (pgmMapAreMappingsEnabled(&pVM->pgm.s))
522 {
523 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
524 PPGMMAPPING pMapping = pVM->pgm.s.CTX_SUFF(pMappings);
525 for ( ; pMapping; pMapping = pMapping->CTX_SUFF(pNext))
526 {
527 if (pvFault < pMapping->GCPtr)
528 break;
529 if (pvFault - pMapping->GCPtr < pMapping->cb)
530 {
531 /*
532 * The first thing we check is if we've got an undetected conflict.
533 */
534 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
535 {
536 unsigned iPT = pMapping->cb >> GST_PD_SHIFT;
537 while (iPT-- > 0)
538#ifdef PGM_WITH_GST_WALK
539 if (GstWalk.pPde[iPT].n.u1Present)
540#else
541 if (pPDSrc->a[iPDSrc + iPT].n.u1Present)
542#endif
543 {
544 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eConflicts);
545 Log(("Trap0e: Detected Conflict %RGv-%RGv\n", pMapping->GCPtr, pMapping->GCPtrLast));
546 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync,right? */
547 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
548 return VINF_PGM_SYNC_CR3;
549 }
550 }
551
552 /*
553 * Check if the fault address is in a virtual page access handler range.
554 */
555 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->HyperVirtHandlers, pvFault);
556 if ( pCur
557 && pvFault - pCur->Core.Key < pCur->cb
558 && uErr & X86_TRAP_PF_RW)
559 {
560# ifdef IN_RC
561 STAM_PROFILE_START(&pCur->Stat, h);
562 pgmUnlock(pVM);
563 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
564 pgmLock(pVM);
565 STAM_PROFILE_STOP(&pCur->Stat, h);
566# else
567 AssertFailed();
568 rc = VINF_EM_RAW_EMULATE_INSTR; /* can't happen with VMX */
569# endif
570 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersMapping);
571 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
572 return rc;
573 }
574
575 /*
576 * Pretend we're not here and let the guest handle the trap.
577 */
578 TRPMSetErrorCode(pVCpu, uErr & ~X86_TRAP_PF_P);
579 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFMapping);
580 LogFlow(("PGM: Mapping access -> route trap to recompiler!\n"));
581 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
582 return VINF_EM_RAW_GUEST_TRAP;
583 }
584 }
585 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeMapping, a);
586 } /* pgmAreMappingsEnabled(&pVM->pgm.s) */
587# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
588
589 /*
590 * Check if this fault address is flagged for special treatment,
591 * which means we'll have to figure out the physical address and
592 * check flags associated with it.
593 *
594 * ASSUME that we can limit any special access handling to pages
595 * in page tables which the guest believes to be present.
596 */
597#ifdef PGM_WITH_GST_WALK
598# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
599 Assert(GstWalk.Pde.n.u1Present);
600# endif
601#else
602 Assert(PdeSrc.n.u1Present);
603#endif
604 {
605#ifdef PGM_WITH_GST_WALK
606# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
607 RTGCPHYS GCPhys = GstWalk.Core.GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
608#if 1
609 RTGCPHYS GCPhys3;
610 if ( GstWalk.Pde.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu))
611 GCPhys3 = GST_GET_PDE_BIG_PG_GCPHYS(pVM, GstWalk.Pde)
612 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
613 else
614 GCPhys3 = GstWalk.Pte.u & GST_PTE_PG_MASK;
615 Assert(GCPhys3 == GCPhys);
616#endif
617# else
618 RTGCPHYS GCPhys = (RTGCPHYS)pvFault & ~(RTGCPHYS)PAGE_OFFSET_MASK;
619# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
620#else
621 RTGCPHYS GCPhys = NIL_RTGCPHYS;
622
623# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
624 if ( PdeSrc.b.u1Size
625 && GST_IS_PSE_ACTIVE(pVCpu))
626 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc)
627 | ((RTGCPHYS)pvFault & (GST_BIG_PAGE_OFFSET_MASK ^ PAGE_OFFSET_MASK));
628 else
629 {
630 PGSTPT pPTSrc;
631 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
632 if (RT_SUCCESS(rc))
633 {
634 unsigned iPTESrc = (pvFault >> GST_PT_SHIFT) & GST_PT_MASK;
635 if (pPTSrc->a[iPTESrc].n.u1Present)
636 GCPhys = pPTSrc->a[iPTESrc].u & GST_PTE_PG_MASK;
637 }
638 }
639# else
640 /* No paging so the fault address is the physical address */
641 GCPhys = (RTGCPHYS)(pvFault & ~PAGE_OFFSET_MASK);
642# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
643#endif /* !PGM_WITH_GST_WALK */
644
645 /*
646 * If we have a GC address we'll check if it has any flags set.
647 */
648#ifndef PGM_WITH_GST_WALK
649 if (GCPhys != NIL_RTGCPHYS)
650#endif
651 {
652 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
653
654 PPGMPAGE pPage;
655 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
656 if (RT_SUCCESS(rc)) /** just handle the failure immediate (it returns) and make things easier to read. */
657 {
658 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
659 {
660 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
661 {
662 /*
663 * Physical page access handler.
664 */
665 const RTGCPHYS GCPhysFault = GCPhys | (pvFault & PAGE_OFFSET_MASK);
666 PPGMPHYSHANDLER pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
667 if (pCur)
668 {
669# ifdef PGM_SYNC_N_PAGES
670 /*
671 * If the region is write protected and we got a page not present fault, then sync
672 * the pages. If the fault was caused by a read, then restart the instruction.
673 * In case of write access continue to the GC write handler.
674 *
675 * ASSUMES that there is only one handler per page or that they have similar write properties.
676 */
677 if ( pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
678 && !(uErr & X86_TRAP_PF_P))
679 {
680#ifdef PGM_WITH_GST_WALK
681# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
682 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
683# else
684 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
685# endif
686#else
687 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
688#endif
689 if ( RT_FAILURE(rc)
690 || !(uErr & X86_TRAP_PF_RW)
691 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
692 {
693 AssertRC(rc);
694 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
695 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
696 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
697 return rc;
698 }
699 }
700# endif
701
702 AssertMsg( pCur->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE
703 || (pCur->enmType == PGMPHYSHANDLERTYPE_PHYSICAL_WRITE && (uErr & X86_TRAP_PF_RW)),
704 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
705
706# if defined(IN_RC) || defined(IN_RING0)
707 if (pCur->CTX_SUFF(pfnHandler))
708 {
709 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
710# ifdef IN_RING0
711 PFNPGMR0PHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
712# else
713 PFNPGMRCPHYSHANDLER pfnHandler = pCur->CTX_SUFF(pfnHandler);
714# endif
715 bool fLeaveLock = (pfnHandler != pPool->CTX_SUFF(pfnAccessHandler));
716 void *pvUser = pCur->CTX_SUFF(pvUser);
717
718 STAM_PROFILE_START(&pCur->Stat, h);
719 if (fLeaveLock)
720 pgmUnlock(pVM); /* @todo: Not entirely safe. */
721
722 rc = pfnHandler(pVM, uErr, pRegFrame, pvFault, GCPhysFault, pvUser);
723 if (fLeaveLock)
724 pgmLock(pVM);
725# ifdef VBOX_WITH_STATISTICS
726 pCur = (PPGMPHYSHANDLER)RTAvlroGCPhysRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->PhysHandlers, GCPhysFault);
727 if (pCur)
728 STAM_PROFILE_STOP(&pCur->Stat, h);
729# else
730 pCur = NULL; /* might be invalid by now. */
731# endif
732
733 }
734 else
735# endif
736 rc = VINF_EM_RAW_EMULATE_INSTR;
737
738 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersPhysical);
739 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
740 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndPhys; });
741 return rc;
742 }
743 }
744# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
745 else
746 {
747# ifdef PGM_SYNC_N_PAGES
748 /*
749 * If the region is write protected and we got a page not present fault, then sync
750 * the pages. If the fault was caused by a read, then restart the instruction.
751 * In case of write access continue to the GC write handler.
752 */
753 if ( PGM_PAGE_GET_HNDL_VIRT_STATE(pPage) < PGM_PAGE_HNDL_PHYS_STATE_ALL
754 && !(uErr & X86_TRAP_PF_P))
755 {
756#ifdef PGM_WITH_GST_WALK
757 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
758#else
759 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
760#endif
761 if ( RT_FAILURE(rc)
762 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
763 || !(uErr & X86_TRAP_PF_RW))
764 {
765 AssertRC(rc);
766 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
767 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
768 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndVirt; });
769 return rc;
770 }
771 }
772# endif
773 /*
774 * Ok, it's an virtual page access handler.
775 *
776 * Since it's faster to search by address, we'll do that first
777 * and then retry by GCPhys if that fails.
778 */
779 /** @todo r=bird: perhaps we should consider looking up by physical address directly now? */
780 /** @note r=svl: true, but lookup on virtual address should remain as a fallback as phys & virt trees might be out of sync, because the
781 * page was changed without us noticing it (not-present -> present without invlpg or mov cr3, xxx)
782 */
783 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
784 if (pCur)
785 {
786 AssertMsg(!(pvFault - pCur->Core.Key < pCur->cb)
787 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
788 || !(uErr & X86_TRAP_PF_P)
789 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
790 ("Unexpected trap for virtual handler: %RGv (phys=%RGp) pPage=%R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
791
792 if ( pvFault - pCur->Core.Key < pCur->cb
793 && ( uErr & X86_TRAP_PF_RW
794 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
795 {
796# ifdef IN_RC
797 STAM_PROFILE_START(&pCur->Stat, h);
798 pgmUnlock(pVM);
799 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
800 pgmLock(pVM);
801 STAM_PROFILE_STOP(&pCur->Stat, h);
802# else
803 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
804# endif
805 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtual);
806 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
807 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
808 return rc;
809 }
810 /* Unhandled part of a monitored page */
811 }
812 else
813 {
814 /* Check by physical address. */
815 unsigned iPage;
816 rc = pgmHandlerVirtualFindByPhysAddr(pVM, GCPhys + (pvFault & PAGE_OFFSET_MASK),
817 &pCur, &iPage);
818 Assert(RT_SUCCESS(rc) || !pCur);
819 if ( pCur
820 && ( uErr & X86_TRAP_PF_RW
821 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
822 {
823 Assert((pCur->aPhysToVirt[iPage].Core.Key & X86_PTE_PAE_PG_MASK) == GCPhys);
824# ifdef IN_RC
825 RTGCPTR off = (iPage << PAGE_SHIFT) + (pvFault & PAGE_OFFSET_MASK) - (pCur->Core.Key & PAGE_OFFSET_MASK);
826 Assert(off < pCur->cb);
827 STAM_PROFILE_START(&pCur->Stat, h);
828 pgmUnlock(pVM);
829 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, off);
830 pgmLock(pVM);
831 STAM_PROFILE_STOP(&pCur->Stat, h);
832# else
833 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
834# endif
835 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualByPhys);
836 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
837 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
838 return rc;
839 }
840 }
841 }
842# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
843
844 /*
845 * There is a handled area of the page, but this fault doesn't belong to it.
846 * We must emulate the instruction.
847 *
848 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
849 * we first check if this was a page-not-present fault for a page with only
850 * write access handlers. Restart the instruction if it wasn't a write access.
851 */
852 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersUnhandled);
853
854 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
855 && !(uErr & X86_TRAP_PF_P))
856 {
857#ifdef PGM_WITH_GST_WALK
858# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
859 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
860# else
861 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
862# endif
863#else
864 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
865#endif
866 if ( RT_FAILURE(rc)
867 || rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE
868 || !(uErr & X86_TRAP_PF_RW))
869 {
870 AssertRC(rc);
871 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersOutOfSync);
872 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
873 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndPhys; });
874 return rc;
875 }
876 }
877
878 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
879 * It's writing to an unhandled part of the LDT page several million times.
880 */
881 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
882 LogFlow(("PGM: PGMInterpretInstruction -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
883 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
884 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
885 return rc;
886 } /* if any kind of handler */
887
888# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
889 if (uErr & X86_TRAP_PF_P)
890 {
891 /*
892 * The page isn't marked, but it might still be monitored by a virtual page access handler.
893 * (ASSUMES no temporary disabling of virtual handlers.)
894 */
895 /** @todo r=bird: Since the purpose is to catch out of sync pages with virtual handler(s) here,
896 * we should correct both the shadow page table and physical memory flags, and not only check for
897 * accesses within the handler region but for access to pages with virtual handlers. */
898 PPGMVIRTHANDLER pCur = (PPGMVIRTHANDLER)RTAvlroGCPtrRangeGet(&pVM->pgm.s.CTX_SUFF(pTrees)->VirtHandlers, pvFault);
899 if (pCur)
900 {
901 AssertMsg( !(pvFault - pCur->Core.Key < pCur->cb)
902 || ( pCur->enmType != PGMVIRTHANDLERTYPE_WRITE
903 || !(uErr & X86_TRAP_PF_P)
904 || (pCur->enmType == PGMVIRTHANDLERTYPE_WRITE && (uErr & X86_TRAP_PF_RW))),
905 ("Unexpected trap for virtual handler: %08X (phys=%08x) %R[pgmpage] uErr=%X, enum=%d\n", pvFault, GCPhys, pPage, uErr, pCur->enmType));
906
907 if ( pvFault - pCur->Core.Key < pCur->cb
908 && ( uErr & X86_TRAP_PF_RW
909 || pCur->enmType != PGMVIRTHANDLERTYPE_WRITE ) )
910 {
911# ifdef IN_RC
912 STAM_PROFILE_START(&pCur->Stat, h);
913 pgmUnlock(pVM);
914 rc = pCur->CTX_SUFF(pfnHandler)(pVM, uErr, pRegFrame, pvFault, pCur->Core.Key, pvFault - pCur->Core.Key);
915 pgmLock(pVM);
916 STAM_PROFILE_STOP(&pCur->Stat, h);
917# else
918 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo for VMX */
919# endif
920 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersVirtualUnmarked);
921 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
922 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndVirt; });
923 return rc;
924 }
925 }
926 }
927# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
928 }
929 else
930 {
931 /*
932 * When the guest accesses invalid physical memory (e.g. probing
933 * of RAM or accessing a remapped MMIO range), then we'll fall
934 * back to the recompiler to emulate the instruction.
935 */
936 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
937 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eHandlersInvalid);
938 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
939 return VINF_EM_RAW_EMULATE_INSTR;
940 }
941
942 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
943
944# ifdef PGM_OUT_OF_SYNC_IN_GC /** @todo remove this bugger. */
945 /*
946 * We are here only if page is present in Guest page tables and
947 * trap is not handled by our handlers.
948 *
949 * Check it for page out-of-sync situation.
950 */
951 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
952
953 if (!(uErr & X86_TRAP_PF_P))
954 {
955 /*
956 * Page is not present in our page tables.
957 * Try to sync it!
958 * BTW, fPageShw is invalid in this branch!
959 */
960 if (uErr & X86_TRAP_PF_US)
961 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
962 else /* supervisor */
963 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
964
965 if (PGM_PAGE_IS_BALLOONED(pPage))
966 {
967 /* Emulate reads from ballooned pages as they are not present in our shadow page tables. (required for e.g. Solaris guests; soft ecc, random nr generator) */
968 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
969 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
970 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
971 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeHandlers, b);
972 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2HndUnhandled; });
973 return rc;
974 }
975# if defined(LOG_ENABLED) && !defined(IN_RING0)
976 RTGCPHYS GCPhys2;
977 uint64_t fPageGst2;
978 PGMGstGetPage(pVCpu, pvFault, &fPageGst2, &GCPhys2);
979#ifdef PGM_WITH_GST_WALK
980# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
981 Log(("Page out of sync: %RGv eip=%08x PdeSrc.US=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
982 pvFault, pRegFrame->eip, GstWalk.Pde.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
983# else
984 Log(("Page out of sync: %RGv eip=%08x fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
985 pvFault, pRegFrame->eip, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
986# endif
987#else
988 Log(("Page out of sync: %RGv eip=%08x PdeSrc.n.u1User=%d fPageGst2=%08llx GCPhys2=%RGp scan=%d\n",
989 pvFault, pRegFrame->eip, PdeSrc.n.u1User, fPageGst2, GCPhys2, CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)));
990#endif
991# endif /* LOG_ENABLED */
992
993# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0)
994 if (CPUMGetGuestCPL(pVCpu, pRegFrame) == 0)
995 {
996 uint64_t fPageGst;
997 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
998 if ( RT_SUCCESS(rc)
999 && !(fPageGst & X86_PTE_US))
1000 {
1001 /* Note: can't check for X86_TRAP_ID bit, because that requires execute disable support on the CPU */
1002 if ( pvFault == (RTGCPTR)pRegFrame->eip
1003 || pvFault - pRegFrame->eip < 8 /* instruction crossing a page boundary */
1004# ifdef CSAM_DETECT_NEW_CODE_PAGES
1005 || ( !PATMIsPatchGCAddr(pVM, pRegFrame->eip)
1006 && CSAMDoesPageNeedScanning(pVM, pRegFrame->eip)) /* any new code we encounter here */
1007# endif /* CSAM_DETECT_NEW_CODE_PAGES */
1008 )
1009 {
1010 LogFlow(("CSAMExecFault %RX32\n", pRegFrame->eip));
1011 rc = CSAMExecFault(pVM, (RTRCPTR)pRegFrame->eip);
1012 if (rc != VINF_SUCCESS)
1013 {
1014 /*
1015 * CSAM needs to perform a job in ring 3.
1016 *
1017 * Sync the page before going to the host context; otherwise we'll end up in a loop if
1018 * CSAM fails (e.g. instruction crosses a page boundary and the next page is not present)
1019 */
1020 LogFlow(("CSAM ring 3 job\n"));
1021#ifdef PGM_WITH_GST_WALK
1022 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
1023#else
1024 int rc2 = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
1025#endif
1026 AssertRC(rc2);
1027
1028 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
1029 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2CSAM; });
1030 return rc;
1031 }
1032 }
1033# ifdef CSAM_DETECT_NEW_CODE_PAGES
1034 else if ( uErr == X86_TRAP_PF_RW
1035 && pRegFrame->ecx >= 0x100 /* early check for movswd count */
1036 && pRegFrame->ecx < 0x10000)
1037 {
1038 /* In case of a write to a non-present supervisor shadow page, we'll take special precautions
1039 * to detect loading of new code pages.
1040 */
1041
1042 /*
1043 * Decode the instruction.
1044 */
1045 RTGCPTR PC;
1046 rc = SELMValidateAndConvertCSAddr(pVM, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
1047 &pRegFrame->csHid, (RTGCPTR)pRegFrame->eip, &PC);
1048 if (rc == VINF_SUCCESS)
1049 {
1050 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
1051 uint32_t cbOp;
1052 rc = EMInterpretDisasOneEx(pVM, pVCpu, PC, pRegFrame, pDis, &cbOp);
1053
1054 /* For now we'll restrict this to rep movsw/d instructions */
1055 if ( rc == VINF_SUCCESS
1056 && pDis->pCurInstr->opcode == OP_MOVSWD
1057 && (pDis->prefix & PREFIX_REP))
1058 {
1059 CSAMMarkPossibleCodePage(pVM, pvFault);
1060 }
1061 }
1062 }
1063# endif /* CSAM_DETECT_NEW_CODE_PAGES */
1064
1065 /*
1066 * Mark this page as safe.
1067 */
1068 /** @todo not correct for pages that contain both code and data!! */
1069 Log2(("CSAMMarkPage %RGv; scanned=%d\n", pvFault, true));
1070 CSAMMarkPage(pVM, pvFault, true);
1071 }
1072 }
1073# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && !defined(IN_RING0) */
1074#ifdef PGM_WITH_GST_WALK
1075# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1076 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
1077# else
1078 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
1079# endif
1080#else
1081 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, PGM_SYNC_NR_PAGES, uErr);
1082#endif
1083 if (RT_SUCCESS(rc))
1084 {
1085 /* The page was successfully synced, return to the guest. */
1086 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
1087 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSync; });
1088 return VINF_SUCCESS;
1089 }
1090 }
1091 else /* uErr & X86_TRAP_PF_P: */
1092 {
1093 /*
1094 * Write protected pages are make writable when the guest makes the first
1095 * write to it. This happens for pages that are shared, write monitored
1096 * and not yet allocated.
1097 *
1098 * Also, a side effect of not flushing global PDEs are out of sync pages due
1099 * to physical monitored regions, that are no longer valid.
1100 * Assume for now it only applies to the read/write flag.
1101 */
1102 if ( RT_SUCCESS(rc)
1103 && (uErr & X86_TRAP_PF_RW))
1104 {
1105 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1106 {
1107 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
1108 Assert(!PGM_PAGE_IS_ZERO(pPage));
1109 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
1110
1111 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
1112 if (rc != VINF_SUCCESS)
1113 {
1114 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
1115 return rc;
1116 }
1117 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
1118 return VINF_EM_NO_MEMORY;
1119 }
1120
1121# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1122 /* Check to see if we need to emulate the instruction as X86_CR0_WP has been cleared. */
1123 if ( CPUMGetGuestCPL(pVCpu, pRegFrame) == 0
1124 && ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG))
1125 {
1126 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
1127 uint64_t fPageGst;
1128 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
1129 if ( RT_SUCCESS(rc)
1130 && !(fPageGst & X86_PTE_RW))
1131 {
1132 rc = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
1133 if (RT_SUCCESS(rc))
1134 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulInRZ);
1135 else
1136 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eWPEmulToR3);
1137 return rc;
1138 }
1139 AssertMsg(RT_SUCCESS(rc), ("Unexpected r/w page %RGv flag=%x rc=%Rrc\n", pvFault, (uint32_t)fPageGst, rc));
1140 }
1141# endif
1142 /// @todo count the above case; else
1143 if (uErr & X86_TRAP_PF_US)
1144 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
1145 else /* supervisor */
1146 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1147
1148 /*
1149 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1150 * page is not present, which is not true in this case.
1151 */
1152#ifdef PGM_WITH_GST_WALK
1153# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1154 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
1155# else
1156 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
1157# endif
1158#else
1159 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, pvFault, 1, uErr);
1160#endif
1161 if (RT_SUCCESS(rc))
1162 {
1163 /*
1164 * Page was successfully synced, return to guest.
1165 * First invalidate the page as it might be in the TLB.
1166 */
1167# if PGM_SHW_TYPE == PGM_TYPE_EPT
1168 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
1169# else
1170 PGM_INVL_PG(pVCpu, pvFault);
1171# endif
1172# ifdef VBOX_STRICT
1173 RTGCPHYS GCPhys2;
1174 uint64_t fPageGst;
1175 if (!pVM->pgm.s.fNestedPaging)
1176 {
1177 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, &GCPhys2);
1178#if defined(PGM_WITH_GST_WALK) && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1179 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%Rrc fPageGst=%RX64 EffRW=%d EffUS=%d uErr=%RGp cr4=%RX64 pvFault=%RGv\n", rc, fPageGst, GstWalk.Core.fEffectiveRW, GstWalk.Core.fEffectiveUS, uErr, CPUMGetGuestCR0(pVCpu), pvFault ));
1180#else
1181 AssertMsg(RT_SUCCESS(rc) && (fPageGst & X86_PTE_RW), ("rc=%Rrc fPageGst=%RX64\n", rc, fPageGst));
1182#endif
1183 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GCPhys2, (uint64_t)fPageGst));
1184 }
1185 uint64_t fPageShw;
1186 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1187 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */, ("rc=%Rrc fPageShw=%RX64\n", rc, fPageShw));
1188# endif /* VBOX_STRICT */
1189 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
1190 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2OutOfSyncHndObs; });
1191 return VINF_SUCCESS;
1192 }
1193 }
1194
1195# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1196# ifdef VBOX_STRICT
1197 /*
1198 * Check for VMM page flags vs. Guest page flags consistency.
1199 * Currently only for debug purposes.
1200 */
1201 if (RT_SUCCESS(rc))
1202 {
1203 /* Get guest page flags. */
1204 uint64_t fPageGst;
1205 rc = PGMGstGetPage(pVCpu, pvFault, &fPageGst, NULL);
1206 if (RT_SUCCESS(rc))
1207 {
1208 uint64_t fPageShw;
1209 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
1210
1211 /*
1212 * Compare page flags.
1213 * Note: we have AVL, A, D bits desynched.
1214 */
1215 AssertMsg((fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)),
1216 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64\n", pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst));
1217 }
1218 else
1219 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
1220 }
1221 else
1222 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
1223# endif /* VBOX_STRICT */
1224# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
1225 }
1226 STAM_PROFILE_STOP(&pVCpu->pgm.s.StatRZTrap0eTimeOutOfSync, c);
1227# endif /* PGM_OUT_OF_SYNC_IN_GC */
1228 }
1229#ifndef PGM_WITH_GST_WALK
1230 else /* GCPhys == NIL_RTGCPHYS */
1231 {
1232 /*
1233 * Page not present in Guest OS or invalid page table address.
1234 * This is potential virtual page access handler food.
1235 *
1236 * For the present we'll say that our access handlers don't
1237 * work for this case - we've already discarded the page table
1238 * not present case which is identical to this.
1239 *
1240 * When we perchance find we need this, we will probably have AVL
1241 * trees (offset based) to operate on and we can measure their speed
1242 * agains mapping a page table and probably rearrange this handling
1243 * a bit. (Like, searching virtual ranges before checking the
1244 * physical address.)
1245 */
1246 }
1247#endif
1248 }
1249 /** @todo This point is never really reached. Clean up later! */
1250
1251# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1252 /*
1253 * Conclusion, this is a guest trap.
1254 */
1255 LogFlow(("PGM: Unhandled #PF -> route trap to recompiler!\n"));
1256 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eGuestPFUnh);
1257 return VINF_EM_RAW_GUEST_TRAP;
1258# else
1259 /* present, but not a monitored page; perhaps the guest is probing physical memory */
1260 return VINF_EM_RAW_EMULATE_INSTR;
1261# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
1262
1263
1264# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1265
1266 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
1267 return VERR_INTERNAL_ERROR;
1268# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1269}
1270#endif /* !IN_RING3 */
1271
1272
1273/**
1274 * Emulation of the invlpg instruction.
1275 *
1276 *
1277 * @returns VBox status code.
1278 *
1279 * @param pVCpu The VMCPU handle.
1280 * @param GCPtrPage Page to invalidate.
1281 *
1282 * @remark ASSUMES that the guest is updating before invalidating. This order
1283 * isn't required by the CPU, so this is speculative and could cause
1284 * trouble.
1285 * @remark No TLB shootdown is done on any other VCPU as we assume that
1286 * invlpg emulation is the *only* reason for calling this function.
1287 * (The guest has to shoot down TLB entries on other CPUs itself)
1288 * Currently true, but keep in mind!
1289 *
1290 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1291 */
1292PGM_BTH_DECL(int, InvalidatePage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
1293{
1294#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1295 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1296 && PGM_SHW_TYPE != PGM_TYPE_EPT
1297 int rc;
1298 PVM pVM = pVCpu->CTX_SUFF(pVM);
1299 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1300
1301 Assert(PGMIsLockOwner(pVM));
1302
1303 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1304
1305# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1306 if (pPool->cDirtyPages)
1307 pgmPoolResetDirtyPages(pVM);
1308# endif
1309
1310 /*
1311 * Get the shadow PD entry and skip out if this PD isn't present.
1312 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1313 */
1314# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1315 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1316 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1317
1318 /* Fetch the pgm pool shadow descriptor. */
1319 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1320 Assert(pShwPde);
1321
1322# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1323 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT);
1324 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
1325
1326 /* If the shadow PDPE isn't present, then skip the invalidate. */
1327 if (!pPdptDst->a[iPdpt].n.u1Present)
1328 {
1329 Assert(!(pPdptDst->a[iPdpt].u & PGM_PLXFLAGS_MAPPING));
1330 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1331 return VINF_SUCCESS;
1332 }
1333
1334 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1335 PPGMPOOLPAGE pShwPde = NULL;
1336 PX86PDPAE pPDDst;
1337
1338 /* Fetch the pgm pool shadow descriptor. */
1339 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1340 AssertRCSuccessReturn(rc, rc);
1341 Assert(pShwPde);
1342
1343 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1344 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1345
1346# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1347 /* PML4 */
1348 const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;
1349 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1350 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1351 PX86PDPAE pPDDst;
1352 PX86PDPT pPdptDst;
1353 PX86PML4E pPml4eDst;
1354 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1355 if (rc != VINF_SUCCESS)
1356 {
1357 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1358 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1359 return VINF_SUCCESS;
1360 }
1361 Assert(pPDDst);
1362
1363 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1364 PX86PDPE pPdpeDst = &pPdptDst->a[iPdpt];
1365
1366 if (!pPdpeDst->n.u1Present)
1367 {
1368 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1369 return VINF_SUCCESS;
1370 }
1371
1372 /* Fetch the pgm pool shadow descriptor. */
1373 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1374 Assert(pShwPde);
1375
1376# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1377
1378 const SHWPDE PdeDst = *pPdeDst;
1379 if (!PdeDst.n.u1Present)
1380 {
1381 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1382 return VINF_SUCCESS;
1383 }
1384
1385# if defined(IN_RC)
1386 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1387 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
1388# endif
1389
1390 /*
1391 * Get the guest PD entry and calc big page.
1392 */
1393# if PGM_GST_TYPE == PGM_TYPE_32BIT
1394 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1395 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
1396 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1397# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1398 unsigned iPDSrc = 0;
1399# if PGM_GST_TYPE == PGM_TYPE_PAE
1400 X86PDPE PdpeSrcIgn;
1401 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1402# else /* AMD64 */
1403 PX86PML4E pPml4eSrcIgn;
1404 X86PDPE PdpeSrcIgn;
1405 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1406# endif
1407 GSTPDE PdeSrc;
1408
1409 if (pPDSrc)
1410 PdeSrc = pPDSrc->a[iPDSrc];
1411 else
1412 PdeSrc.u = 0;
1413# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1414 const bool fIsBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
1415
1416# ifdef IN_RING3
1417 /*
1418 * If a CR3 Sync is pending we may ignore the invalidate page operation
1419 * depending on the kind of sync and if it's a global page or not.
1420 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1421 */
1422# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1423 if ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1424 || ( VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1425 && fIsBigPage
1426 && PdeSrc.b.u1Global
1427 )
1428 )
1429# else
1430 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1431# endif
1432 {
1433 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePageSkipped));
1434 return VINF_SUCCESS;
1435 }
1436# endif /* IN_RING3 */
1437
1438 /*
1439 * Deal with the Guest PDE.
1440 */
1441 rc = VINF_SUCCESS;
1442 if (PdeSrc.n.u1Present)
1443 {
1444 Assert( PdeSrc.n.u1User == PdeDst.n.u1User
1445 && (PdeSrc.n.u1Write || !PdeDst.n.u1Write));
1446# ifndef PGM_WITHOUT_MAPPING
1447 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
1448 {
1449 /*
1450 * Conflict - Let SyncPT deal with it to avoid duplicate code.
1451 */
1452 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1453 Assert(PGMGetGuestMode(pVCpu) <= PGMMODE_PAE);
1454 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
1455 }
1456 else
1457# endif /* !PGM_WITHOUT_MAPPING */
1458 if (!fIsBigPage)
1459 {
1460 /*
1461 * 4KB - page.
1462 */
1463 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1464 RTGCPHYS GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
1465
1466# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1467 /* Reset the modification counter (OpenSolaris trashes tlb entries very often) */
1468 if (pShwPage->cModifications)
1469 pShwPage->cModifications = 1;
1470# endif
1471
1472# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1473 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1474 GCPhys |= (iPDDst & 1) * (PAGE_SIZE/2);
1475# endif
1476 if (pShwPage->GCPhys == GCPhys)
1477 {
1478# if 0 /* likely cause of a major performance regression; must be SyncPageWorkerTrackDeref then */
1479 const unsigned iPTEDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1480 PSHWPT pPT = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
1481 if (pPT->a[iPTEDst].n.u1Present)
1482 {
1483 /* This is very unlikely with caching/monitoring enabled. */
1484 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pShwPage, pPT->a[iPTEDst].u & SHW_PTE_PG_MASK, iPTEDst);
1485 ASMAtomicWriteSize(&pPT->a[iPTEDst], 0);
1486 }
1487# else /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1488 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
1489 if (RT_SUCCESS(rc))
1490 rc = VINF_SUCCESS;
1491# endif
1492 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1493 PGM_INVL_PG(pVCpu, GCPtrPage);
1494 }
1495 else
1496 {
1497 /*
1498 * The page table address changed.
1499 */
1500 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1501 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1502 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1503 ASMAtomicWriteSize(pPdeDst, 0);
1504 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1505 PGM_INVL_VCPU_TLBS(pVCpu);
1506 }
1507 }
1508 else
1509 {
1510 /*
1511 * 2/4MB - page.
1512 */
1513 /* Before freeing the page, check if anything really changed. */
1514 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1515 RTGCPHYS GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
1516# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1517 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1518 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
1519# endif
1520 if ( pShwPage->GCPhys == GCPhys
1521 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1522 {
1523 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1524 /** @todo PAT */
1525 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1526 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PWT | X86_PDE_PCD))
1527 && ( PdeSrc.b.u1Dirty /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1528 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1529 {
1530 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1531 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1532# if defined(IN_RC)
1533 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1534 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1535# endif
1536 return VINF_SUCCESS;
1537 }
1538 }
1539
1540 /*
1541 * Ok, the page table is present and it's been changed in the guest.
1542 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1543 * We could do this for some flushes in GC too, but we need an algorithm for
1544 * deciding which 4MB pages containing code likely to be executed very soon.
1545 */
1546 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1547 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1548 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1549 ASMAtomicWriteSize(pPdeDst, 0);
1550 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1551 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1552 }
1553 }
1554 else
1555 {
1556 /*
1557 * Page directory is not present, mark shadow PDE not present.
1558 */
1559 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
1560 {
1561 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1562 ASMAtomicWriteSize(pPdeDst, 0);
1563 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1564 PGM_INVL_PG(pVCpu, GCPtrPage);
1565 }
1566 else
1567 {
1568 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
1569 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,InvalidatePagePDMappings));
1570 }
1571 }
1572# if defined(IN_RC)
1573 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
1574 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
1575# endif
1576 return rc;
1577
1578#else /* guest real and protected mode */
1579 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1580 return VINF_SUCCESS;
1581#endif
1582}
1583
1584
1585/**
1586 * Update the tracking of shadowed pages.
1587 *
1588 * @param pVCpu The VMCPU handle.
1589 * @param pShwPage The shadow page.
1590 * @param HCPhys The physical page we is being dereferenced.
1591 * @param iPte Shadow PTE index
1592 */
1593DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte)
1594{
1595 PVM pVM = pVCpu->CTX_SUFF(pVM);
1596
1597 STAM_PROFILE_START(&pVM->pgm.s.StatTrackDeref, a);
1598 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1599
1600 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1601 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1602 * 2. write protect all shadowed pages. I.e. implement caching.
1603 */
1604 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1605
1606 /*
1607 * Find the guest address.
1608 */
1609 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
1610 pRam;
1611 pRam = pRam->CTX_SUFF(pNext))
1612 {
1613 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1614 while (iPage-- > 0)
1615 {
1616 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1617 {
1618 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1619
1620 Assert(pShwPage->cPresent);
1621 Assert(pPool->cPresent);
1622 pShwPage->cPresent--;
1623 pPool->cPresent--;
1624
1625 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1626 STAM_PROFILE_STOP(&pVM->pgm.s.StatTrackDeref, a);
1627 return;
1628 }
1629 }
1630 }
1631
1632 for (;;)
1633 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1634}
1635
1636
1637/**
1638 * Update the tracking of shadowed pages.
1639 *
1640 * @param pVCpu The VMCPU handle.
1641 * @param pShwPage The shadow page.
1642 * @param u16 The top 16-bit of the pPage->HCPhys.
1643 * @param pPage Pointer to the guest page. this will be modified.
1644 * @param iPTDst The index into the shadow table.
1645 */
1646DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPU pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16, PPGMPAGE pPage, const unsigned iPTDst)
1647{
1648 PVM pVM = pVCpu->CTX_SUFF(pVM);
1649 /*
1650 * Just deal with the simple first time here.
1651 */
1652 if (!u16)
1653 {
1654 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackVirgin);
1655 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1656 /* Save the page table index. */
1657 PGM_PAGE_SET_PTE_INDEX(pPage, iPTDst);
1658 }
1659 else
1660 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1661
1662 /* write back */
1663 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1664 PGM_PAGE_SET_TRACKING(pPage, u16);
1665
1666 /* update statistics. */
1667 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1668 pShwPage->cPresent++;
1669 if (pShwPage->iFirstPresent > iPTDst)
1670 pShwPage->iFirstPresent = iPTDst;
1671}
1672
1673
1674/**
1675 * Modifies a shadow PTE to account for access handlers.
1676 *
1677 * @param pVM The VM handle.
1678 * @param pPage The page in question.
1679 * @param fPteSrc The flags of the source PTE.
1680 * @param pPteDst The shadow PTE (output).
1681 */
1682DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVM pVM, PCPGMPAGE pPage, uint32_t fPteSrc, PSHWPTE pPteDst)
1683{
1684 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1685 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1686 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1687 {
1688#if PGM_SHW_TYPE == PGM_TYPE_EPT
1689 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage);
1690 pPteDst->n.u1Present = 1;
1691 pPteDst->n.u1Execute = 1;
1692 pPteDst->n.u1IgnorePAT = 1;
1693 pPteDst->n.u3EMT = VMX_EPT_MEMTYPE_WB;
1694 /* PteDst.n.u1Write = 0 && PteDst.n.u1Size = 0 */
1695#else
1696 pPteDst->u = (fPteSrc & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1697 | PGM_PAGE_GET_HCPHYS(pPage);
1698#endif
1699 }
1700#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1701# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1702 else if ( PGM_PAGE_IS_MMIO(pPage)
1703# if PGM_SHW_TYPE != PGM_TYPE_EPT
1704 && ( (fPteSrc & (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/ | X86_PTE_US )) /* #PF handles D & A first. */
1705 == (X86_PTE_RW /*| X86_PTE_D | X86_PTE_A*/)
1706 || BTH_IS_NP_ACTIVE(pVM) )
1707# endif
1708# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1709 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1710# endif
1711 )
1712 {
1713 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1714# if PGM_SHW_TYPE == PGM_TYPE_EPT
1715 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1716 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg;
1717 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1718 pPteDst->n.u1Present = 0;
1719 pPteDst->n.u1Write = 1;
1720 pPteDst->n.u1Execute = 0;
1721 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1722 pPteDst->n.u3EMT = 7;
1723# else
1724 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1725 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P;
1726# endif
1727 }
1728# endif
1729#endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1730 else
1731 {
1732 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1733 pPteDst->u = 0;
1734 }
1735 /** @todo count these kinds of entries. */
1736}
1737
1738
1739/**
1740 * Creates a 4K shadow page for a guest page.
1741 *
1742 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1743 * physical address. The PdeSrc argument only the flags are used. No page
1744 * structured will be mapped in this function.
1745 *
1746 * @param pVCpu The VMCPU handle.
1747 * @param pPteDst Destination page table entry.
1748 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1749 * Can safely assume that only the flags are being used.
1750 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1751 * @param pShwPage Pointer to the shadow page.
1752 * @param iPTDst The index into the shadow table.
1753 *
1754 * @remark Not used for 2/4MB pages!
1755 */
1756DECLINLINE(void) PGM_BTH_NAME(SyncPageWorker)(PVMCPU pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1757 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1758{
1759 if ( PteSrc.n.u1Present
1760 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1761 {
1762 PVM pVM = pVCpu->CTX_SUFF(pVM);
1763
1764# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1765 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1766 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64)
1767 if (pShwPage->fDirty)
1768 {
1769 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1770 PX86PTPAE pGstPT;
1771
1772 pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
1773 pGstPT->a[iPTDst].u = PteSrc.u;
1774 }
1775# endif
1776 /*
1777 * Find the ram range.
1778 */
1779 PPGMPAGE pPage;
1780 int rc = pgmPhysGetPageEx(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK, &pPage);
1781 if (RT_SUCCESS(rc))
1782 {
1783 /* Ignore ballooned pages.
1784 Don't return errors or use a fatal assert here as part of a
1785 shadow sync range might included ballooned pages. */
1786 if (PGM_PAGE_IS_BALLOONED(pPage))
1787 {
1788 Assert(!pPteDst->n.u1Present); /** @todo user tracking needs updating if this triggers. */
1789 return;
1790 }
1791
1792#ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1793 /* Make the page writable if necessary. */
1794 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1795 && ( PGM_PAGE_IS_ZERO(pPage)
1796 || ( PteSrc.n.u1Write
1797 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1798# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1799 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1800# endif
1801# ifdef VBOX_WITH_PAGE_SHARING
1802 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1803# endif
1804 )
1805 )
1806 )
1807 {
1808 rc = pgmPhysPageMakeWritable(pVM, pPage, PteSrc.u & GST_PTE_PG_MASK);
1809 AssertRC(rc);
1810 }
1811#endif
1812
1813 /*
1814 * Make page table entry.
1815 */
1816 SHWPTE PteDst;
1817 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1818 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
1819 PteSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT
1820 | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW),
1821 &PteDst);
1822 else
1823 {
1824#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1825 /*
1826 * If the page or page directory entry is not marked accessed,
1827 * we mark the page not present.
1828 */
1829 if (!PteSrc.n.u1Accessed || !PdeSrc.n.u1Accessed)
1830 {
1831 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1832 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,AccessedPage));
1833 PteDst.u = 0;
1834 }
1835 /*
1836 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1837 * when the page is modified.
1838 */
1839 else if (!PteSrc.n.u1Dirty && (PdeSrc.n.u1Write & PteSrc.n.u1Write))
1840 {
1841 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPage));
1842 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT | X86_PTE_RW))
1843 | PGM_PAGE_GET_HCPHYS(pPage)
1844 | PGM_PTFLAGS_TRACK_DIRTY;
1845 }
1846 else
1847#endif
1848 {
1849 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageSkipped));
1850#if PGM_SHW_TYPE == PGM_TYPE_EPT
1851 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage);
1852 PteDst.n.u1Present = 1;
1853 PteDst.n.u1Write = 1;
1854 PteDst.n.u1Execute = 1;
1855 PteDst.n.u1IgnorePAT = 1;
1856 PteDst.n.u3EMT = VMX_EPT_MEMTYPE_WB;
1857 /* PteDst.n.u1Size = 0 */
1858#else
1859 PteDst.u = (PteSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1860 | PGM_PAGE_GET_HCPHYS(pPage);
1861#endif
1862 }
1863
1864 /*
1865 * Make sure only allocated pages are mapped writable.
1866 */
1867 if ( PteDst.n.u1Write
1868 && PteDst.n.u1Present
1869 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1870 {
1871 /* Still applies to shared pages. */
1872 Assert(!PGM_PAGE_IS_ZERO(pPage));
1873 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet. Why, isn't it? */
1874 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", (RTGCPHYS)(PteSrc.u & X86_PTE_PAE_PG_MASK), pPage, iPTDst));
1875 }
1876 }
1877
1878 /*
1879 * Keep user track up to date.
1880 */
1881 if (PteDst.n.u1Present)
1882 {
1883 if (!pPteDst->n.u1Present)
1884 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1885 else if ((pPteDst->u & SHW_PTE_PG_MASK) != (PteDst.u & SHW_PTE_PG_MASK))
1886 {
1887 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", (uint64_t)pPteDst->u, (uint64_t)PteDst.u));
1888 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1889 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1890 }
1891 }
1892 else if (pPteDst->n.u1Present)
1893 {
1894 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1895 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1896 }
1897
1898 /*
1899 * Update statistics and commit the entry.
1900 */
1901#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1902 if (!PteSrc.n.u1Global)
1903 pShwPage->fSeenNonGlobal = true;
1904#endif
1905 ASMAtomicWriteSize(pPteDst, PteDst.u);
1906 return;
1907 }
1908
1909/** @todo count these three different kinds. */
1910 Log2(("SyncPageWorker: invalid address in Pte\n"));
1911 }
1912 else if (!PteSrc.n.u1Present)
1913 Log2(("SyncPageWorker: page not present in Pte\n"));
1914 else
1915 Log2(("SyncPageWorker: invalid Pte\n"));
1916
1917 /*
1918 * The page is not present or the PTE is bad. Replace the shadow PTE by
1919 * an empty entry, making sure to keep the user tracking up to date.
1920 */
1921 if (pPteDst->n.u1Present)
1922 {
1923 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", (uint64_t)pPteDst->u));
1924 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, pPteDst->u & SHW_PTE_PG_MASK, iPTDst);
1925 }
1926 ASMAtomicWriteSize(pPteDst, 0);
1927}
1928
1929
1930/**
1931 * Syncs a guest OS page.
1932 *
1933 * There are no conflicts at this point, neither is there any need for
1934 * page table allocations.
1935 *
1936 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1937 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1938 *
1939 * @returns VBox status code.
1940 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1941 * @param pVCpu The VMCPU handle.
1942 * @param PdeSrc Page directory entry of the guest.
1943 * @param GCPtrPage Guest context page address.
1944 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1945 * @param uErr Fault error (X86_TRAP_PF_*).
1946 */
1947PGM_BTH_DECL(int, SyncPage)(PVMCPU pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1948{
1949 PVM pVM = pVCpu->CTX_SUFF(pVM);
1950 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1951 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1952
1953 Assert(PGMIsLockOwner(pVM));
1954
1955#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1956 || PGM_GST_TYPE == PGM_TYPE_PAE \
1957 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1958 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
1959 && PGM_SHW_TYPE != PGM_TYPE_EPT
1960
1961 /*
1962 * Assert preconditions.
1963 */
1964 Assert(PdeSrc.n.u1Present);
1965 Assert(cPages);
1966# if 0 /* rarely useful; leave for debugging. */
1967 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1968# endif
1969
1970 /*
1971 * Get the shadow PDE, find the shadow page table in the pool.
1972 */
1973# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1974 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1975 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
1976
1977 /* Fetch the pgm pool shadow descriptor. */
1978 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1979 Assert(pShwPde);
1980
1981# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1982 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1983 PPGMPOOLPAGE pShwPde = NULL;
1984 PX86PDPAE pPDDst;
1985
1986 /* Fetch the pgm pool shadow descriptor. */
1987 int rc2 = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
1988 AssertRCSuccessReturn(rc2, rc2);
1989 Assert(pShwPde);
1990
1991 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
1992 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1993
1994# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1995 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1996 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1997 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1998 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1999
2000 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2001 AssertRCSuccessReturn(rc2, rc2);
2002 Assert(pPDDst && pPdptDst);
2003 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
2004# endif
2005 SHWPDE PdeDst = *pPdeDst;
2006
2007 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2008 if (!PdeDst.n.u1Present)
2009 {
2010 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE p=%p/%RX64\n", pPdeDst, (uint64_t)PdeDst.u));
2011 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2012 return VINF_SUCCESS; /* force the instruction to be executed again. */
2013 }
2014
2015 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2016 Assert(pShwPage);
2017
2018# if PGM_GST_TYPE == PGM_TYPE_AMD64
2019 /* Fetch the pgm pool shadow descriptor. */
2020 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2021 Assert(pShwPde);
2022# endif
2023
2024# if defined(IN_RC)
2025 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2026 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
2027# endif
2028
2029 /*
2030 * Check that the page is present and that the shadow PDE isn't out of sync.
2031 */
2032 const bool fBigPage = PdeSrc.b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
2033 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
2034 RTGCPHYS GCPhys;
2035 if (!fBigPage)
2036 {
2037 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
2038# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2039 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2040 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
2041# endif
2042 }
2043 else
2044 {
2045 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
2046# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2047 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2048 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
2049# endif
2050 }
2051 if ( fPdeValid
2052 && pShwPage->GCPhys == GCPhys
2053 && PdeSrc.n.u1Present
2054 && PdeSrc.n.u1User == PdeDst.n.u1User
2055 && (PdeSrc.n.u1Write == PdeDst.n.u1Write || !PdeDst.n.u1Write)
2056# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2057 && (PdeSrc.n.u1NoExecute == PdeDst.n.u1NoExecute || !GST_IS_NX_ACTIVE(pVCpu))
2058# endif
2059 )
2060 {
2061 /*
2062 * Check that the PDE is marked accessed already.
2063 * Since we set the accessed bit *before* getting here on a #PF, this
2064 * check is only meant for dealing with non-#PF'ing paths.
2065 */
2066 if (PdeSrc.n.u1Accessed)
2067 {
2068 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2069 if (!fBigPage)
2070 {
2071 /*
2072 * 4KB Page - Map the guest page table.
2073 */
2074 PGSTPT pPTSrc;
2075 int rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
2076 if (RT_SUCCESS(rc))
2077 {
2078# ifdef PGM_SYNC_N_PAGES
2079 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2080 if ( cPages > 1
2081 && !(uErr & X86_TRAP_PF_P)
2082 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2083 {
2084 /*
2085 * This code path is currently only taken when the caller is PGMTrap0eHandler
2086 * for non-present pages!
2087 *
2088 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2089 * deal with locality.
2090 */
2091 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2092# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2093 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2094 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2095# else
2096 const unsigned offPTSrc = 0;
2097# endif
2098 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2099 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2100 iPTDst = 0;
2101 else
2102 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2103 for (; iPTDst < iPTDstEnd; iPTDst++)
2104 {
2105 if (!pPTDst->a[iPTDst].n.u1Present)
2106 {
2107 GSTPTE PteSrc = pPTSrc->a[offPTSrc + iPTDst];
2108 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT)) | ((offPTSrc + iPTDst) << PAGE_SHIFT);
2109 NOREF(GCPtrCurPage);
2110#ifndef IN_RING0
2111 /*
2112 * Assuming kernel code will be marked as supervisor - and not as user level
2113 * and executed using a conforming code selector - And marked as readonly.
2114 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
2115 */
2116 PPGMPAGE pPage;
2117 if ( ((PdeSrc.u & PteSrc.u) & (X86_PTE_RW | X86_PTE_US))
2118 || iPTDst == ((GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK) /* always sync GCPtrPage */
2119 || !CSAMDoesPageNeedScanning(pVM, GCPtrCurPage)
2120 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
2121 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2122 )
2123#endif /* else: CSAM not active */
2124 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2125 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2126 GCPtrCurPage, PteSrc.n.u1Present,
2127 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2128 PteSrc.n.u1User & PdeSrc.n.u1User,
2129 (uint64_t)PteSrc.u,
2130 (uint64_t)pPTDst->a[iPTDst].u,
2131 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2132 }
2133 }
2134 }
2135 else
2136# endif /* PGM_SYNC_N_PAGES */
2137 {
2138 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2139 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2140 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2141 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2142 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2143 GCPtrPage, PteSrc.n.u1Present,
2144 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2145 PteSrc.n.u1User & PdeSrc.n.u1User,
2146 (uint64_t)PteSrc.u,
2147 (uint64_t)pPTDst->a[iPTDst].u,
2148 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2149 }
2150 }
2151 else /* MMIO or invalid page: emulated in #PF handler. */
2152 {
2153 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
2154 Assert(!pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK].n.u1Present);
2155 }
2156 }
2157 else
2158 {
2159 /*
2160 * 4/2MB page - lazy syncing shadow 4K pages.
2161 * (There are many causes of getting here, it's no longer only CSAM.)
2162 */
2163 /* Calculate the GC physical address of this 4KB shadow page. */
2164 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK);
2165 /* Find ram range. */
2166 PPGMPAGE pPage;
2167 int rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPhys, &pPage);
2168 if (RT_SUCCESS(rc))
2169 {
2170 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2171
2172# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2173 /* Try to make the page writable if necessary. */
2174 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2175 && ( PGM_PAGE_IS_ZERO(pPage)
2176 || ( PdeSrc.n.u1Write
2177 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2178# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2179 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2180# endif
2181# ifdef VBOX_WITH_PAGE_SHARING
2182 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2183# endif
2184 )
2185 )
2186 )
2187 {
2188 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2189 AssertRC(rc);
2190 }
2191# endif
2192
2193 /*
2194 * Make shadow PTE entry.
2195 */
2196 SHWPTE PteDst;
2197 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2198 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage,
2199 PdeSrc.u & ~( X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK
2200 | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT),
2201 &PteDst);
2202 else
2203 PteDst.u = (PdeSrc.u & ~(X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
2204 | PGM_PAGE_GET_HCPHYS(pPage);
2205
2206 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2207 if ( PteDst.n.u1Present
2208 && !pPTDst->a[iPTDst].n.u1Present)
2209 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2210
2211 /* Make sure only allocated pages are mapped writable. */
2212 if ( PteDst.n.u1Write
2213 && PteDst.n.u1Present
2214 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2215 {
2216 /* Still applies to shared pages. */
2217 Assert(!PGM_PAGE_IS_ZERO(pPage));
2218 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
2219 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2220 }
2221
2222 ASMAtomicWriteSize(&pPTDst->a[iPTDst], PteDst.u);
2223
2224 /*
2225 * If the page is not flagged as dirty and is writable, then make it read-only
2226 * at PD level, so we can set the dirty bit when the page is modified.
2227 *
2228 * ASSUMES that page access handlers are implemented on page table entry level.
2229 * Thus we will first catch the dirty access and set PDE.D and restart. If
2230 * there is an access handler, we'll trap again and let it work on the problem.
2231 */
2232 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2233 * As for invlpg, it simply frees the whole shadow PT.
2234 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2235 if ( !PdeSrc.b.u1Dirty
2236 && PdeSrc.b.u1Write)
2237 {
2238 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
2239 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2240 PdeDst.n.u1Write = 0;
2241 }
2242 else
2243 {
2244 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2245 PdeDst.n.u1Write = PdeSrc.n.u1Write;
2246 }
2247 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2248 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2249 GCPtrPage, PdeSrc.n.u1Present, PdeSrc.n.u1Write, PdeSrc.n.u1User, (uint64_t)PdeSrc.u, GCPhys,
2250 PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2251 }
2252 else
2253 {
2254 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2255 /** @todo must wipe the shadow page table in this case. */
2256 }
2257 }
2258# if defined(IN_RC)
2259 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2260 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2261# endif
2262 return VINF_SUCCESS;
2263 }
2264
2265 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDNAs));
2266 }
2267 else if (fPdeValid)
2268 {
2269 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2270 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2271 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2272 }
2273 else
2274 {
2275/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2276 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2277 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2278 }
2279
2280 /*
2281 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2282 * Yea, I'm lazy.
2283 */
2284 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2285 ASMAtomicWriteSize(pPdeDst, 0);
2286
2287# if defined(IN_RC)
2288 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2289 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
2290# endif
2291 PGM_INVL_VCPU_TLBS(pVCpu);
2292 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2293
2294
2295#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2296 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2297 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2298 && !defined(IN_RC)
2299
2300# ifdef PGM_SYNC_N_PAGES
2301 /*
2302 * Get the shadow PDE, find the shadow page table in the pool.
2303 */
2304# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2305 X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
2306
2307# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2308 X86PDEPAE PdeDst = pgmShwGetPaePDE(&pVCpu->pgm.s, GCPtrPage);
2309
2310# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2311 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2312 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2313 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2314 X86PDEPAE PdeDst;
2315 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2316
2317 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2318 AssertRCSuccessReturn(rc, rc);
2319 Assert(pPDDst && pPdptDst);
2320 PdeDst = pPDDst->a[iPDDst];
2321# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2322 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2323 PEPTPD pPDDst;
2324 EPTPDE PdeDst;
2325
2326 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2327 if (rc != VINF_SUCCESS)
2328 {
2329 AssertRC(rc);
2330 return rc;
2331 }
2332 Assert(pPDDst);
2333 PdeDst = pPDDst->a[iPDDst];
2334# endif
2335 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2336 if (!PdeDst.n.u1Present)
2337 {
2338 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2339 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2340 return VINF_SUCCESS; /* force the instruction to be executed again. */
2341 }
2342
2343 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2344 if (PdeDst.n.u1Size)
2345 {
2346 Assert(pVM->pgm.s.fNestedPaging);
2347 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2348 return VINF_SUCCESS;
2349 }
2350
2351 /* Mask away the page offset. */
2352 GCPtrPage &= ~((RTGCPTR)0xfff);
2353
2354 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2355 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2356
2357 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2358 if ( cPages > 1
2359 && !(uErr & X86_TRAP_PF_P)
2360 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
2361 {
2362 /*
2363 * This code path is currently only taken when the caller is PGMTrap0eHandler
2364 * for non-present pages!
2365 *
2366 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2367 * deal with locality.
2368 */
2369 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2370 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2371 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2372 iPTDst = 0;
2373 else
2374 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2375 for (; iPTDst < iPTDstEnd; iPTDst++)
2376 {
2377 if (!pPTDst->a[iPTDst].n.u1Present)
2378 {
2379 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2380 GSTPTE PteSrc;
2381
2382 /* Fake the page table entry */
2383 PteSrc.u = GCPtrCurPage;
2384 PteSrc.n.u1Present = 1;
2385 PteSrc.n.u1Dirty = 1;
2386 PteSrc.n.u1Accessed = 1;
2387 PteSrc.n.u1Write = 1;
2388 PteSrc.n.u1User = 1;
2389
2390 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2391
2392 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2393 GCPtrCurPage, PteSrc.n.u1Present,
2394 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2395 PteSrc.n.u1User & PdeSrc.n.u1User,
2396 (uint64_t)PteSrc.u,
2397 (uint64_t)pPTDst->a[iPTDst].u,
2398 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2399
2400 if (RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)))
2401 break;
2402 }
2403 else
2404 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n", (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT), iPTDst, pPTDst->a[iPTDst].u));
2405 }
2406 }
2407 else
2408# endif /* PGM_SYNC_N_PAGES */
2409 {
2410 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2411 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << PAGE_SHIFT);
2412 GSTPTE PteSrc;
2413
2414 /* Fake the page table entry */
2415 PteSrc.u = GCPtrCurPage;
2416 PteSrc.n.u1Present = 1;
2417 PteSrc.n.u1Dirty = 1;
2418 PteSrc.n.u1Accessed = 1;
2419 PteSrc.n.u1Write = 1;
2420 PteSrc.n.u1User = 1;
2421 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2422
2423 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}PteDst=%08llx%s\n",
2424 GCPtrPage, PteSrc.n.u1Present,
2425 PteSrc.n.u1Write & PdeSrc.n.u1Write,
2426 PteSrc.n.u1User & PdeSrc.n.u1User,
2427 (uint64_t)PteSrc.u,
2428 (uint64_t)pPTDst->a[iPTDst].u,
2429 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2430 }
2431 return VINF_SUCCESS;
2432
2433#else
2434 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2435 return VERR_INTERNAL_ERROR;
2436#endif
2437}
2438
2439
2440#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
2441
2442/**
2443 * CheckPageFault helper for returning a page fault indicating a non-present
2444 * (NP) entry in the page translation structures.
2445 *
2446 * @returns VINF_EM_RAW_GUEST_TRAP.
2447 * @param pVCpu The virtual CPU to operate on.
2448 * @param uErr The error code of the shadow fault. Corrections to
2449 * TRPM's copy will be made if necessary.
2450 * @param GCPtrPage For logging.
2451 * @param uPageFaultLevel For logging.
2452 */
2453DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnNP)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2454{
2455 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2456 AssertMsg(!(uErr & X86_TRAP_PF_P), ("%#x\n", uErr));
2457 AssertMsg(!(uErr & X86_TRAP_PF_RSVD), ("%#x\n", uErr));
2458 if (uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2459 TRPMSetErrorCode(pVCpu, uErr & ~(X86_TRAP_PF_RSVD | X86_TRAP_PF_P));
2460
2461 Log(("CheckPageFault: real page fault (notp) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2462 return VINF_EM_RAW_GUEST_TRAP;
2463}
2464
2465
2466/**
2467 * CheckPageFault helper for returning a page fault indicating a reserved bit
2468 * (RSVD) error in the page translation structures.
2469 *
2470 * @returns VINF_EM_RAW_GUEST_TRAP.
2471 * @param pVCpu The virtual CPU to operate on.
2472 * @param uErr The error code of the shadow fault. Corrections to
2473 * TRPM's copy will be made if necessary.
2474 * @param GCPtrPage For logging.
2475 * @param uPageFaultLevel For logging.
2476 */
2477DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnRSVD)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2478{
2479 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2480 if ((uErr & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2481 TRPMSetErrorCode(pVCpu, uErr | X86_TRAP_PF_RSVD | X86_TRAP_PF_P);
2482
2483 Log(("CheckPageFault: real page fault (rsvd) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2484 return VINF_EM_RAW_GUEST_TRAP;
2485}
2486
2487
2488/**
2489 * CheckPageFault helper for returning a page protection fault (P).
2490 *
2491 * @returns VINF_EM_RAW_GUEST_TRAP.
2492 * @param pVCpu The virtual CPU to operate on.
2493 * @param uErr The error code of the shadow fault. Corrections to
2494 * TRPM's copy will be made if necessary.
2495 * @param GCPtrPage For logging.
2496 * @param uPageFaultLevel For logging.
2497 */
2498DECLINLINE(int) PGM_BTH_NAME(CheckPageFaultReturnProt)(PVMCPU pVCpu, uint32_t uErr, RTGCPTR GCPtrPage, unsigned uPageFaultLevel)
2499{
2500 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyTrackRealPF));
2501 AssertMsg(uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID), ("%#x\n", uErr));
2502 if ((uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) != X86_TRAP_PF_P)
2503 TRPMSetErrorCode(pVCpu, (uErr & ~X86_TRAP_PF_RSVD) | X86_TRAP_PF_P);
2504
2505 Log(("CheckPageFault: real page fault (prot) at %RGv (%d)\n", GCPtrPage, uPageFaultLevel));
2506 return VINF_EM_RAW_GUEST_TRAP;
2507}
2508
2509
2510/**
2511 * Investigate a page fault to identify ones targetted at the guest and to
2512 * handle write protection page faults caused by dirty bit tracking.
2513 *
2514 * This will do detect invalid entries and raise X86_TRAP_PF_RSVD.
2515 *
2516 * @returns VBox status code.
2517 * @param pVCpu The VMCPU handle.
2518 * @param uErr Page fault error code. The X86_TRAP_PF_RSVD flag
2519 * cannot be trusted as it is used for MMIO optimizations.
2520 * @param pPdeSrc Guest page directory entry.
2521 * @param GCPtrPage Guest context page address.
2522 */
2523PGM_BTH_DECL(int, CheckPageFault)(PVMCPU pVCpu, uint32_t uErr, PGSTPDE pPdeSrc, RTGCPTR GCPtrPage)
2524{
2525 bool fUserLevelFault = !!(uErr & X86_TRAP_PF_US);
2526 bool fWriteFault = !!(uErr & X86_TRAP_PF_RW);
2527# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2528 bool fMaybeNXEFault = (uErr & X86_TRAP_PF_ID) && GST_IS_NX_ACTIVE(pVCpu);
2529# endif
2530 bool fMaybeWriteProtFault = fWriteFault && (fUserLevelFault || CPUMIsGuestR0WriteProtEnabled(pVCpu));
2531 PVM pVM = pVCpu->CTX_SUFF(pVM);
2532 int rc;
2533
2534 LogFlow(("CheckPageFault: GCPtrPage=%RGv uErr=%#x PdeSrc=%08x\n", GCPtrPage, uErr, pPdeSrc->u));
2535
2536 /*
2537 * Note! For PAE it is safe to assume that bad guest physical addresses
2538 * (which returns all FFs) in the translation tables will cause
2539 * #PF(RSVD). The same will be the case for long mode provided the
2540 * physical address width is less than 52 bits - this we ASSUME.
2541 *
2542 * Note! No convenient shortcuts here, we have to validate everything!
2543 */
2544
2545# if PGM_GST_TYPE == PGM_TYPE_AMD64
2546 /*
2547 * Real page fault? (PML4E level)
2548 */
2549 PX86PML4 pPml4Src = pgmGstGetLongModePML4Ptr(pVCpu);
2550 if (RT_UNLIKELY(!pPml4Src))
2551 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2552
2553 PX86PML4E pPml4eSrc = &pPml4Src->a[(GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK];
2554 if (!pPml4eSrc->n.u1Present)
2555 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 0);
2556 if (RT_UNLIKELY(!GST_IS_PML4E_VALID(pVCpu, *pPml4eSrc)))
2557 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 0);
2558 if ( (fMaybeWriteProtFault && !pPml4eSrc->n.u1Write)
2559 || (fMaybeNXEFault && pPml4eSrc->n.u1NoExecute)
2560 || (fUserLevelFault && !pPml4eSrc->n.u1User) )
2561 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2562
2563 /*
2564 * Real page fault? (PDPE level)
2565 */
2566 PX86PDPT pPdptSrc;
2567 rc = PGM_GCPHYS_2_PTR_BY_VMCPU(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK, &pPdptSrc);
2568 if (RT_FAILURE(rc))
2569 {
2570 AssertMsgReturn(rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS, ("%Rrc\n", rc), rc);
2571 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2572 }
2573
2574 PX86PDPE pPdpeSrc = &pPdptSrc->a[(GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64];
2575 if (!pPdpeSrc->n.u1Present)
2576 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2577 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2578 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2579 if ( (fMaybeWriteProtFault && !pPdpeSrc->lm.u1Write)
2580 || (fMaybeNXEFault && pPdpeSrc->lm.u1NoExecute)
2581 || (fUserLevelFault && !pPdpeSrc->lm.u1User) )
2582 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 1);
2583
2584# elif PGM_GST_TYPE == PGM_TYPE_PAE
2585 /*
2586 * Real page fault? (PDPE level)
2587 */
2588 PX86PDPT pPdptSrc = pgmGstGetPaePDPTPtr(pVCpu);
2589 if (RT_UNLIKELY(!pPdptSrc))
2590 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2591/** @todo Handle bad CR3 address. */
2592 PX86PDPE pPdpeSrc = pgmGstGetPaePDPEPtr(pVCpu, GCPtrPage);
2593 if (!pPdpeSrc->n.u1Present)
2594 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 1);
2595 if (!GST_IS_PDPE_VALID(pVCpu, *pPdpeSrc))
2596 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 1);
2597# endif /* PGM_GST_TYPE == PGM_TYPE_PAE */
2598
2599 /*
2600 * Real page fault? (PDE level)
2601 */
2602 if (!pPdeSrc->n.u1Present)
2603 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 2);
2604 bool const fBigPage = pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu);
2605 if (!fBigPage ? !GST_IS_PDE_VALID(pVCpu, *pPdeSrc) : !GST_IS_BIG_PDE_VALID(pVCpu, *pPdeSrc))
2606 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 2);
2607 if ( (fMaybeWriteProtFault && !pPdeSrc->n.u1Write)
2608# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2609 || (fMaybeNXEFault && pPdeSrc->n.u1NoExecute)
2610# endif
2611 || (fUserLevelFault && !pPdeSrc->n.u1User) )
2612 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 2);
2613
2614 /*
2615 * First check the easy case where the page directory has been marked
2616 * read-only to track the dirty bit of an emulated BIG page.
2617 */
2618 if (fBigPage)
2619 {
2620 /* Mark guest page directory as accessed */
2621# if PGM_GST_TYPE == PGM_TYPE_AMD64
2622 pPml4eSrc->n.u1Accessed = 1;
2623 pPdpeSrc->lm.u1Accessed = 1;
2624# endif
2625 pPdeSrc->b.u1Accessed = 1;
2626
2627 /* Mark the entry guest PDE dirty it it's a write access. */
2628 if (fWriteFault)
2629 pPdeSrc->b.u1Dirty = 1;
2630 }
2631 else
2632 {
2633 /*
2634 * Map the guest page table.
2635 */
2636 PGSTPT pPTSrc;
2637 PGSTPTE pPteSrc;
2638 GSTPTE PteSrc;
2639 rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2640 if (RT_SUCCESS(rc))
2641 {
2642 pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2643 PteSrc.u = pPteSrc->u;
2644 }
2645 else if (rc == VERR_PGM_INVALID_GC_PHYSICAL_ADDRESS)
2646 {
2647 /* All bits in the PTE are set. */
2648# if PGM_GST_TYPE == PGM_TYPE_32BIT
2649 PteSrc.u = UINT32_MAX;
2650# else
2651 PteSrc.u = UINT64_MAX;
2652# endif
2653 pPteSrc = &PteSrc;
2654 }
2655 else
2656 {
2657 AssertRC(rc);
2658 return rc;
2659 }
2660
2661 /*
2662 * Real page fault?
2663 */
2664 if (!PteSrc.n.u1Present)
2665 return PGM_BTH_NAME(CheckPageFaultReturnNP)(pVCpu, uErr, GCPtrPage, 3);
2666 if (!GST_IS_PTE_VALID(pVCpu, PteSrc))
2667 return PGM_BTH_NAME(CheckPageFaultReturnRSVD)(pVCpu, uErr, GCPtrPage, 3);
2668 if ( (fMaybeWriteProtFault && !PteSrc.n.u1Write)
2669# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2670 || (fMaybeNXEFault && PteSrc.n.u1NoExecute)
2671# endif
2672 || (fUserLevelFault && !PteSrc.n.u1User) )
2673 return PGM_BTH_NAME(CheckPageFaultReturnProt)(pVCpu, uErr, GCPtrPage, 0);
2674
2675 LogFlow(("CheckPageFault: page fault at %RGv PteSrc.u=%08x\n", GCPtrPage, PteSrc.u));
2676
2677 /*
2678 * Set the accessed bits in the page directory and the page table.
2679 */
2680# if PGM_GST_TYPE == PGM_TYPE_AMD64
2681 pPml4eSrc->n.u1Accessed = 1;
2682 pPdpeSrc->lm.u1Accessed = 1;
2683# endif
2684 pPdeSrc->n.u1Accessed = 1;
2685 pPteSrc->n.u1Accessed = 1;
2686
2687 /*
2688 * Set the dirty flag in the PTE if it's a write access.
2689 */
2690 if (fWriteFault)
2691 {
2692# ifdef VBOX_WITH_STATISTICS
2693 if (!pPteSrc->n.u1Dirty)
2694 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtiedPage));
2695 else
2696 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageAlreadyDirty));
2697# endif
2698
2699 pPteSrc->n.u1Dirty = 1;
2700 }
2701 }
2702 return VINF_SUCCESS;
2703}
2704
2705
2706/**
2707 * Handle dirty bit tracking faults.
2708 *
2709 * @returns VBox status code.
2710 * @param pVCpu The VMCPU handle.
2711 * @param uErr Page fault error code.
2712 * @param pPdeSrc Guest page directory entry.
2713 * @param pPdeDst Shadow page directory entry.
2714 * @param GCPtrPage Guest context page address.
2715 */
2716PGM_BTH_DECL(int, CheckDirtyPageFault)(PVMCPU pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage)
2717{
2718 PVM pVM = pVCpu->CTX_SUFF(pVM);
2719 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2720
2721 Assert(PGMIsLockOwner(pVM));
2722
2723 /*
2724 * Handle big page.
2725 */
2726 if (pPdeSrc->b.u1Size && GST_IS_PSE_ACTIVE(pVCpu))
2727 {
2728 if ( pPdeDst->n.u1Present
2729 && (pPdeDst->u & PGM_PDFLAGS_TRACK_DIRTY))
2730 {
2731 SHWPDE PdeDst = *pPdeDst;
2732
2733 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2734 Assert(pPdeSrc->b.u1Write);
2735
2736 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2737 * fault again and take this path to only invalidate the entry (see below).
2738 */
2739 PdeDst.n.u1Write = 1;
2740 PdeDst.n.u1Accessed = 1;
2741 PdeDst.au32[0] &= ~PGM_PDFLAGS_TRACK_DIRTY;
2742 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
2743 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2744 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2745 }
2746
2747# ifdef IN_RING0
2748 /* Check for stale TLB entry; only applies to the SMP guest case. */
2749 if ( pVM->cCpus > 1
2750 && pPdeDst->n.u1Write
2751 && pPdeDst->n.u1Accessed)
2752 {
2753 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2754 if (pShwPage)
2755 {
2756 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2757 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2758 if ( pPteDst->n.u1Present
2759 && pPteDst->n.u1Write)
2760 {
2761 /* Stale TLB entry. */
2762 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2763 PGM_INVL_PG(pVCpu, GCPtrPage);
2764 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2765 }
2766 }
2767 }
2768# endif /* IN_RING0 */
2769 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2770 }
2771
2772 /*
2773 * Map the guest page table.
2774 */
2775 PGSTPT pPTSrc;
2776 int rc = PGM_GCPHYS_2_PTR(pVM, pPdeSrc->u & GST_PDE_PG_MASK, &pPTSrc);
2777 if (RT_FAILURE(rc))
2778 {
2779 AssertRC(rc);
2780 return rc;
2781 }
2782
2783 if (pPdeDst->n.u1Present)
2784 {
2785 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2786 const GSTPTE PteSrc = *pPteSrc;
2787
2788#ifndef IN_RING0
2789 /* Bail out here as pgmPoolGetPage will return NULL and we'll crash below.
2790 * Our individual shadow handlers will provide more information and force a fatal exit.
2791 */
2792 if (MMHyperIsInsideArea(pVM, (RTGCPTR)GCPtrPage))
2793 {
2794 LogRel(("CheckPageFault: write to hypervisor region %RGv\n", GCPtrPage));
2795 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2796 }
2797#endif
2798 /*
2799 * Map shadow page table.
2800 */
2801 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2802 if (pShwPage)
2803 {
2804 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
2805 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2806 if (pPteDst->n.u1Present) /** @todo Optimize accessed bit emulation? */
2807 {
2808 if (pPteDst->u & PGM_PTFLAGS_TRACK_DIRTY)
2809 {
2810 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, pPteSrc->u & GST_PTE_PG_MASK);
2811 SHWPTE PteDst = *pPteDst;
2812
2813 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2814 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageTrap));
2815
2816 Assert(pPteSrc->n.u1Write);
2817
2818 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2819 * entry will not harm; write access will simply fault again and
2820 * take this path to only invalidate the entry.
2821 */
2822 if (RT_LIKELY(pPage))
2823 {
2824 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2825 {
2826 AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2827 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2828 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2829 PteDst.n.u1Write = 0;
2830 }
2831 else
2832 {
2833 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2834 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2835 {
2836 rc = pgmPhysPageMakeWritable(pVM, pPage, pPteSrc->u & GST_PTE_PG_MASK);
2837 AssertRC(rc);
2838 }
2839 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2840 PteDst.n.u1Write = 1;
2841 else
2842 {
2843 /* Still applies to shared pages. */
2844 Assert(!PGM_PAGE_IS_ZERO(pPage));
2845 PteDst.n.u1Write = 0;
2846 }
2847 }
2848 }
2849 else
2850 PteDst.n.u1Write = 1; /** @todo r=bird: This doesn't make sense to me. */
2851
2852 PteDst.n.u1Dirty = 1;
2853 PteDst.n.u1Accessed = 1;
2854 PteDst.au32[0] &= ~PGM_PTFLAGS_TRACK_DIRTY;
2855 ASMAtomicWriteSize(pPteDst, PteDst.u);
2856 PGM_INVL_PG(pVCpu, GCPtrPage);
2857 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2858 }
2859
2860# ifdef IN_RING0
2861 /* Check for stale TLB entry; only applies to the SMP guest case. */
2862 if ( pVM->cCpus > 1
2863 && pPteDst->n.u1Write == 1
2864 && pPteDst->n.u1Accessed == 1)
2865 {
2866 /* Stale TLB entry. */
2867 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageStale));
2868 PGM_INVL_PG(pVCpu, GCPtrPage);
2869 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2870 }
2871# endif
2872 }
2873 }
2874 else
2875 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2876 }
2877
2878 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2879}
2880
2881#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
2882
2883
2884/**
2885 * Sync a shadow page table.
2886 *
2887 * The shadow page table is not present. This includes the case where
2888 * there is a conflict with a mapping.
2889 *
2890 * @returns VBox status code.
2891 * @param pVCpu The VMCPU handle.
2892 * @param iPD Page directory index.
2893 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2894 * Assume this is a temporary mapping.
2895 * @param GCPtrPage GC Pointer of the page that caused the fault
2896 */
2897PGM_BTH_DECL(int, SyncPT)(PVMCPU pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2898{
2899 PVM pVM = pVCpu->CTX_SUFF(pVM);
2900 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2901
2902 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2903#if 0 /* rarely useful; leave for debugging. */
2904 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2905#endif
2906 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage));
2907
2908 Assert(PGMIsLocked(pVM));
2909
2910#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2911 || PGM_GST_TYPE == PGM_TYPE_PAE \
2912 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2913 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
2914 && PGM_SHW_TYPE != PGM_TYPE_EPT
2915
2916 int rc = VINF_SUCCESS;
2917
2918 /*
2919 * Validate input a little bit.
2920 */
2921 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2922# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2923 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2924 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
2925
2926 /* Fetch the pgm pool shadow descriptor. */
2927 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2928 Assert(pShwPde);
2929
2930# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2931 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2932 PPGMPOOLPAGE pShwPde = NULL;
2933 PX86PDPAE pPDDst;
2934 PSHWPDE pPdeDst;
2935
2936 /* Fetch the pgm pool shadow descriptor. */
2937 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
2938 AssertRCSuccessReturn(rc, rc);
2939 Assert(pShwPde);
2940
2941 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
2942 pPdeDst = &pPDDst->a[iPDDst];
2943
2944# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2945 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2946 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2947 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2948 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2949 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2950 AssertRCSuccessReturn(rc, rc);
2951 Assert(pPDDst);
2952 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2953# endif
2954 SHWPDE PdeDst = *pPdeDst;
2955
2956# if PGM_GST_TYPE == PGM_TYPE_AMD64
2957 /* Fetch the pgm pool shadow descriptor. */
2958 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2959 Assert(pShwPde);
2960# endif
2961
2962# ifndef PGM_WITHOUT_MAPPINGS
2963 /*
2964 * Check for conflicts.
2965 * RC: In case of a conflict we'll go to Ring-3 and do a full SyncCR3.
2966 * R3: Simply resolve the conflict.
2967 */
2968 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
2969 {
2970 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
2971# ifndef IN_RING3
2972 Log(("SyncPT: Conflict at %RGv\n", GCPtrPage));
2973 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2974 return VERR_ADDRESS_CONFLICT;
2975
2976# else /* IN_RING3 */
2977 PPGMMAPPING pMapping = pgmGetMapping(pVM, (RTGCPTR)GCPtrPage);
2978 Assert(pMapping);
2979# if PGM_GST_TYPE == PGM_TYPE_32BIT
2980 rc = pgmR3SyncPTResolveConflict(pVM, pMapping, pPDSrc, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2981# elif PGM_GST_TYPE == PGM_TYPE_PAE
2982 rc = pgmR3SyncPTResolveConflictPAE(pVM, pMapping, GCPtrPage & (GST_PD_MASK << GST_PD_SHIFT));
2983# else
2984 AssertFailed(); /* can't happen for amd64 */
2985# endif
2986 if (RT_FAILURE(rc))
2987 {
2988 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
2989 return rc;
2990 }
2991 PdeDst = *pPdeDst;
2992# endif /* IN_RING3 */
2993 }
2994# endif /* !PGM_WITHOUT_MAPPINGS */
2995 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
2996
2997# if defined(IN_RC)
2998 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
2999 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3000# endif
3001
3002 /*
3003 * Sync page directory entry.
3004 */
3005 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3006 if (PdeSrc.n.u1Present)
3007 {
3008 /*
3009 * Allocate & map the page table.
3010 */
3011 PSHWPT pPTDst;
3012 const bool fPageTable = !PdeSrc.b.u1Size || !GST_IS_PSE_ACTIVE(pVCpu);
3013 PPGMPOOLPAGE pShwPage;
3014 RTGCPHYS GCPhys;
3015 if (fPageTable)
3016 {
3017 GCPhys = PdeSrc.u & GST_PDE_PG_MASK;
3018# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3019 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3020 GCPhys |= (iPDDst & 1) * (PAGE_SIZE / 2);
3021# endif
3022 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3023 }
3024 else
3025 {
3026 PGMPOOLACCESS enmAccess;
3027# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
3028 const bool fNoExecute = PdeSrc.n.u1NoExecute && GST_IS_NX_ACTIVE(pVCpu);
3029# else
3030 const bool fNoExecute = false;
3031# endif
3032
3033 GCPhys = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
3034# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3035 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
3036 GCPhys |= GCPtrPage & (1 << X86_PD_PAE_SHIFT);
3037# endif
3038 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
3039 if (PdeSrc.n.u1User)
3040 {
3041 if (PdeSrc.n.u1Write)
3042 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
3043 else
3044 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
3045 }
3046 else
3047 {
3048 if (PdeSrc.n.u1Write)
3049 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
3050 else
3051 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
3052 }
3053 rc = pgmPoolAllocEx(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, pShwPde->idx, iPDDst, &pShwPage);
3054 }
3055 if (rc == VINF_SUCCESS)
3056 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3057 else if (rc == VINF_PGM_CACHED_PAGE)
3058 {
3059 /*
3060 * The PT was cached, just hook it up.
3061 */
3062 if (fPageTable)
3063 PdeDst.u = pShwPage->Core.Key
3064 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
3065 else
3066 {
3067 PdeDst.u = pShwPage->Core.Key
3068 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
3069 /* (see explanation and assumptions further down.) */
3070 if ( !PdeSrc.b.u1Dirty
3071 && PdeSrc.b.u1Write)
3072 {
3073 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
3074 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
3075 PdeDst.b.u1Write = 0;
3076 }
3077 }
3078 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3079# if defined(IN_RC)
3080 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3081# endif
3082 return VINF_SUCCESS;
3083 }
3084 else if (rc == VERR_PGM_POOL_FLUSHED)
3085 {
3086 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3087# if defined(IN_RC)
3088 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3089# endif
3090 return VINF_PGM_SYNC_CR3;
3091 }
3092 else
3093 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3094 PdeDst.u &= X86_PDE_AVL_MASK;
3095 PdeDst.u |= pShwPage->Core.Key;
3096
3097 /*
3098 * Page directory has been accessed (this is a fault situation, remember).
3099 */
3100 pPDSrc->a[iPDSrc].n.u1Accessed = 1;
3101 if (fPageTable)
3102 {
3103 /*
3104 * Page table - 4KB.
3105 *
3106 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
3107 */
3108 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
3109 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u));
3110 PGSTPT pPTSrc;
3111 rc = PGM_GCPHYS_2_PTR(pVM, PdeSrc.u & GST_PDE_PG_MASK, &pPTSrc);
3112 if (RT_SUCCESS(rc))
3113 {
3114 /*
3115 * Start by syncing the page directory entry so CSAM's TLB trick works.
3116 */
3117 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
3118 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
3119 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3120# if defined(IN_RC)
3121 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3122# endif
3123
3124 /*
3125 * Directory/page user or supervisor privilege: (same goes for read/write)
3126 *
3127 * Directory Page Combined
3128 * U/S U/S U/S
3129 * 0 0 0
3130 * 0 1 0
3131 * 1 0 0
3132 * 1 1 1
3133 *
3134 * Simple AND operation. Table listed for completeness.
3135 *
3136 */
3137 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4K));
3138# ifdef PGM_SYNC_N_PAGES
3139 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
3140 unsigned iPTDst = iPTBase;
3141 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
3142 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
3143 iPTDst = 0;
3144 else
3145 iPTDst -= PGM_SYNC_NR_PAGES / 2;
3146# else /* !PGM_SYNC_N_PAGES */
3147 unsigned iPTDst = 0;
3148 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
3149# endif /* !PGM_SYNC_N_PAGES */
3150# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3151 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3152 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
3153# else
3154 const unsigned offPTSrc = 0;
3155# endif
3156 for (; iPTDst < iPTDstEnd; iPTDst++)
3157 {
3158 const unsigned iPTSrc = iPTDst + offPTSrc;
3159 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
3160
3161 if (PteSrc.n.u1Present)
3162 {
3163# ifndef IN_RING0
3164 /*
3165 * Assuming kernel code will be marked as supervisor - and not as user level
3166 * and executed using a conforming code selector - And marked as readonly.
3167 * Also assume that if we're monitoring a page, it's of no interest to CSAM.
3168 */
3169 PPGMPAGE pPage;
3170 if ( ((PdeSrc.u & pPTSrc->a[iPTSrc].u) & (X86_PTE_RW | X86_PTE_US))
3171 || !CSAMDoesPageNeedScanning(pVM, (iPDSrc << GST_PD_SHIFT) | (iPTSrc << PAGE_SHIFT))
3172 || ( (pPage = pgmPhysGetPage(&pVM->pgm.s, PteSrc.u & GST_PTE_PG_MASK))
3173 && PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
3174 )
3175# endif
3176 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
3177 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
3178 (RTGCPTR)(((RTGCPTR)iPDSrc << GST_PD_SHIFT) | ((RTGCPTR)iPTSrc << PAGE_SHIFT)),
3179 PteSrc.n.u1Present,
3180 PteSrc.n.u1Write & PdeSrc.n.u1Write,
3181 PteSrc.n.u1User & PdeSrc.n.u1User,
3182 (uint64_t)PteSrc.u,
3183 pPTDst->a[iPTDst].u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : "", pPTDst->a[iPTDst].u, iPTSrc, PdeSrc.au32[0],
3184 (RTGCPHYS)((PdeSrc.u & GST_PDE_PG_MASK) + iPTSrc*sizeof(PteSrc)) ));
3185 }
3186 /* else: the page table was cleared by the pool */
3187 } /* for PTEs */
3188 }
3189 }
3190 else
3191 {
3192 /*
3193 * Big page - 2/4MB.
3194 *
3195 * We'll walk the ram range list in parallel and optimize lookups.
3196 * We will only sync on shadow page table at a time.
3197 */
3198 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT4M));
3199
3200 /**
3201 * @todo It might be more efficient to sync only a part of the 4MB page (similar to what we do for 4kb PDs).
3202 */
3203
3204 /*
3205 * Start by syncing the page directory entry.
3206 */
3207 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
3208 | (PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PDE_AVL_MASK | X86_PDE_PCD | X86_PDE_PWT | X86_PDE_PS | X86_PDE4M_G | X86_PDE4M_D));
3209
3210 /*
3211 * If the page is not flagged as dirty and is writable, then make it read-only
3212 * at PD level, so we can set the dirty bit when the page is modified.
3213 *
3214 * ASSUMES that page access handlers are implemented on page table entry level.
3215 * Thus we will first catch the dirty access and set PDE.D and restart. If
3216 * there is an access handler, we'll trap again and let it work on the problem.
3217 */
3218 /** @todo move the above stuff to a section in the PGM documentation. */
3219 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
3220 if ( !PdeSrc.b.u1Dirty
3221 && PdeSrc.b.u1Write)
3222 {
3223 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,DirtyPageBig));
3224 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
3225 PdeDst.b.u1Write = 0;
3226 }
3227 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3228# if defined(IN_RC)
3229 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3230# endif
3231
3232 /*
3233 * Fill the shadow page table.
3234 */
3235 /* Get address and flags from the source PDE. */
3236 SHWPTE PteDstBase;
3237 PteDstBase.u = PdeSrc.u & ~(GST_PDE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT);
3238
3239 /* Loop thru the entries in the shadow PT. */
3240 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
3241 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
3242 GCPtrPage, PdeSrc.b.u1Present, PdeSrc.b.u1Write, PdeSrc.b.u1User, (uint64_t)PdeSrc.u, GCPtr,
3243 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3244 PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRanges);
3245 unsigned iPTDst = 0;
3246 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3247 && !VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3248 {
3249 /* Advance ram range list. */
3250 while (pRam && GCPhys > pRam->GCPhysLast)
3251 pRam = pRam->CTX_SUFF(pNext);
3252 if (pRam && GCPhys >= pRam->GCPhys)
3253 {
3254 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> PAGE_SHIFT;
3255 do
3256 {
3257 /* Make shadow PTE. */
3258 PPGMPAGE pPage = &pRam->aPages[iHCPage];
3259 SHWPTE PteDst;
3260
3261# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3262 /* Try to make the page writable if necessary. */
3263 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
3264 && ( PGM_PAGE_IS_ZERO(pPage)
3265 || ( PteDstBase.n.u1Write
3266 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
3267# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
3268 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
3269# endif
3270# ifdef VBOX_WITH_PAGE_SHARING
3271 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
3272# endif
3273 && !PGM_PAGE_IS_BALLOONED(pPage))
3274 )
3275 )
3276 {
3277 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
3278 AssertRCReturn(rc, rc);
3279 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY))
3280 break;
3281 }
3282# endif
3283
3284 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
3285 {
3286 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
3287 {
3288 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3289 PteDst.n.u1Write = 0;
3290 }
3291 else
3292 PteDst.u = 0;
3293 }
3294 else if (PGM_PAGE_IS_BALLOONED(pPage))
3295 {
3296 /* Skip ballooned pages. */
3297 PteDst.u = 0;
3298 }
3299# ifndef IN_RING0
3300 /*
3301 * Assuming kernel code will be marked as supervisor and not as user level and executed
3302 * using a conforming code selector. Don't check for readonly, as that implies the whole
3303 * 4MB can be code or readonly data. Linux enables write access for its large pages.
3304 */
3305 else if ( !PdeSrc.n.u1User
3306 && CSAMDoesPageNeedScanning(pVM, GCPtr | (iPTDst << SHW_PT_SHIFT)))
3307 PteDst.u = 0;
3308# endif
3309 else
3310 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage) | PteDstBase.u;
3311
3312 /* Only map writable pages writable. */
3313 if ( PteDst.n.u1Write
3314 && PteDst.n.u1Present
3315 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
3316 {
3317 /* Still applies to shared pages. */
3318 Assert(!PGM_PAGE_IS_ZERO(pPage));
3319 PteDst.n.u1Write = 0; /** @todo this isn't quite working yet... */
3320 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
3321 }
3322
3323 if (PteDst.n.u1Present)
3324 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
3325
3326 /* commit it */
3327 pPTDst->a[iPTDst] = PteDst;
3328 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
3329 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), PteDst.n.u1Present, PteDst.n.u1Write, PteDst.n.u1User, (uint64_t)PteDst.u,
3330 PteDst.u & PGM_PTFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
3331
3332 /* advance */
3333 GCPhys += PAGE_SIZE;
3334 iHCPage++;
3335 iPTDst++;
3336 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3337 && GCPhys <= pRam->GCPhysLast);
3338 }
3339 else if (pRam)
3340 {
3341 Log(("Invalid pages at %RGp\n", GCPhys));
3342 do
3343 {
3344 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3345 GCPhys += PAGE_SIZE;
3346 iPTDst++;
3347 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
3348 && GCPhys < pRam->GCPhys);
3349 }
3350 else
3351 {
3352 Log(("Invalid pages at %RGp (2)\n", GCPhys));
3353 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3354 pPTDst->a[iPTDst].u = 0; /* MMIO or invalid page, we must handle them manually. */
3355 }
3356 } /* while more PTEs */
3357 } /* 4KB / 4MB */
3358 }
3359 else
3360 AssertRelease(!PdeDst.n.u1Present);
3361
3362 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3363 if (RT_FAILURE(rc))
3364 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPTFailed));
3365 return rc;
3366
3367#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3368 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3369 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3370 && !defined(IN_RC)
3371
3372 /*
3373 * Validate input a little bit.
3374 */
3375 int rc = VINF_SUCCESS;
3376# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3377 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3378 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3379
3380 /* Fetch the pgm pool shadow descriptor. */
3381 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3382 Assert(pShwPde);
3383
3384# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3385 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3386 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3387 PX86PDPAE pPDDst;
3388 PSHWPDE pPdeDst;
3389
3390 /* Fetch the pgm pool shadow descriptor. */
3391 rc = pgmShwGetPaePoolPagePD(&pVCpu->pgm.s, GCPtrPage, &pShwPde);
3392 AssertRCSuccessReturn(rc, rc);
3393 Assert(pShwPde);
3394
3395 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_BY_PGM(&pVM->pgm.s, pShwPde);
3396 pPdeDst = &pPDDst->a[iPDDst];
3397
3398# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3399 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3400 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3401 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3402 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3403 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3404 AssertRCSuccessReturn(rc, rc);
3405 Assert(pPDDst);
3406 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3407
3408 /* Fetch the pgm pool shadow descriptor. */
3409 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3410 Assert(pShwPde);
3411
3412# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3413 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3414 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3415 PEPTPD pPDDst;
3416 PEPTPDPT pPdptDst;
3417
3418 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3419 if (rc != VINF_SUCCESS)
3420 {
3421 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3422 AssertRC(rc);
3423 return rc;
3424 }
3425 Assert(pPDDst);
3426 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3427
3428 /* Fetch the pgm pool shadow descriptor. */
3429 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3430 Assert(pShwPde);
3431# endif
3432 SHWPDE PdeDst = *pPdeDst;
3433
3434 Assert(!(PdeDst.u & PGM_PDFLAGS_MAPPING));
3435 Assert(!PdeDst.n.u1Present); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3436
3437# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3438 if (BTH_IS_NP_ACTIVE(pVM))
3439 {
3440 PPGMPAGE pPage;
3441
3442 /* Check if we allocated a big page before for this 2 MB range. */
3443 rc = pgmPhysGetPageEx(&pVM->pgm.s, GCPtrPage & X86_PDE2M_PAE_PG_MASK, &pPage);
3444 if (RT_SUCCESS(rc))
3445 {
3446 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3447
3448 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3449 {
3450 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3451 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3452 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3453 }
3454 else if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED)
3455 {
3456 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3457 rc = pgmPhysIsValidLargePage(pVM, GCPtrPage, pPage);
3458 if (RT_SUCCESS(rc))
3459 {
3460 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3461 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3462 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3463 }
3464 }
3465 else if (PGMIsUsingLargePages(pVM))
3466 {
3467 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3468 if (RT_SUCCESS(rc))
3469 {
3470 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3471 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3472 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3473 }
3474 else
3475 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3476 }
3477
3478 if (HCPhys != NIL_RTHCPHYS)
3479 {
3480 PdeDst.u &= X86_PDE_AVL_MASK;
3481 PdeDst.u |= HCPhys;
3482 PdeDst.n.u1Present = 1;
3483 PdeDst.n.u1Write = 1;
3484 PdeDst.b.u1Size = 1;
3485# if PGM_SHW_TYPE == PGM_TYPE_EPT
3486 PdeDst.n.u1Execute = 1;
3487 PdeDst.b.u1IgnorePAT = 1;
3488 PdeDst.b.u3EMT = VMX_EPT_MEMTYPE_WB;
3489# else
3490 PdeDst.n.u1User = 1;
3491# endif
3492 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3493
3494 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3495 /* Add a reference to the first page only. */
3496 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3497
3498 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3499 return VINF_SUCCESS;
3500 }
3501 }
3502 }
3503# endif /* HC_ARCH_BITS == 64 */
3504
3505 GSTPDE PdeSrc;
3506 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3507 PdeSrc.n.u1Present = 1;
3508 PdeSrc.n.u1Write = 1;
3509 PdeSrc.n.u1Accessed = 1;
3510 PdeSrc.n.u1User = 1;
3511
3512 /*
3513 * Allocate & map the page table.
3514 */
3515 PSHWPT pPTDst;
3516 PPGMPOOLPAGE pShwPage;
3517 RTGCPHYS GCPhys;
3518
3519 /* Virtual address = physical address */
3520 GCPhys = GCPtrPage & X86_PAGE_4K_BASE_MASK;
3521 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, pShwPde->idx, iPDDst, &pShwPage);
3522
3523 if ( rc == VINF_SUCCESS
3524 || rc == VINF_PGM_CACHED_PAGE)
3525 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR(pVM, pShwPage);
3526 else
3527 AssertMsgFailedReturn(("rc=%Rrc\n", rc), VERR_INTERNAL_ERROR);
3528
3529 PdeDst.u &= X86_PDE_AVL_MASK;
3530 PdeDst.u |= pShwPage->Core.Key;
3531 PdeDst.n.u1Present = 1;
3532 PdeDst.n.u1Write = 1;
3533# if PGM_SHW_TYPE == PGM_TYPE_EPT
3534 PdeDst.n.u1Execute = 1;
3535# else
3536 PdeDst.n.u1User = 1;
3537 PdeDst.n.u1Accessed = 1;
3538# endif
3539 ASMAtomicWriteSize(pPdeDst, PdeDst.u);
3540
3541 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, PGM_SYNC_NR_PAGES, 0 /* page not present */);
3542 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3543 return rc;
3544
3545#else
3546 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3547 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPT), a);
3548 return VERR_INTERNAL_ERROR;
3549#endif
3550}
3551
3552
3553
3554/**
3555 * Prefetch a page/set of pages.
3556 *
3557 * Typically used to sync commonly used pages before entering raw mode
3558 * after a CR3 reload.
3559 *
3560 * @returns VBox status code.
3561 * @param pVCpu The VMCPU handle.
3562 * @param GCPtrPage Page to invalidate.
3563 */
3564PGM_BTH_DECL(int, PrefetchPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage)
3565{
3566#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3567 || PGM_GST_TYPE == PGM_TYPE_REAL \
3568 || PGM_GST_TYPE == PGM_TYPE_PROT \
3569 || PGM_GST_TYPE == PGM_TYPE_PAE \
3570 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3571 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3572 && PGM_SHW_TYPE != PGM_TYPE_EPT
3573
3574 /*
3575 * Check that all Guest levels thru the PDE are present, getting the
3576 * PD and PDE in the processes.
3577 */
3578 int rc = VINF_SUCCESS;
3579# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3580# if PGM_GST_TYPE == PGM_TYPE_32BIT
3581 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3582 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3583# elif PGM_GST_TYPE == PGM_TYPE_PAE
3584 unsigned iPDSrc;
3585 X86PDPE PdpeSrc;
3586 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3587 if (!pPDSrc)
3588 return VINF_SUCCESS; /* not present */
3589# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3590 unsigned iPDSrc;
3591 PX86PML4E pPml4eSrc;
3592 X86PDPE PdpeSrc;
3593 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3594 if (!pPDSrc)
3595 return VINF_SUCCESS; /* not present */
3596# endif
3597 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3598# else
3599 PGSTPD pPDSrc = NULL;
3600 const unsigned iPDSrc = 0;
3601 GSTPDE PdeSrc;
3602
3603 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3604 PdeSrc.n.u1Present = 1;
3605 PdeSrc.n.u1Write = 1;
3606 PdeSrc.n.u1Accessed = 1;
3607 PdeSrc.n.u1User = 1;
3608# endif
3609
3610 if (PdeSrc.n.u1Present && PdeSrc.n.u1Accessed)
3611 {
3612 PVM pVM = pVCpu->CTX_SUFF(pVM);
3613 pgmLock(pVM);
3614
3615# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3616 const X86PDE PdeDst = pgmShwGet32BitPDE(&pVCpu->pgm.s, GCPtrPage);
3617# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3618 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3619 PX86PDPAE pPDDst;
3620 X86PDEPAE PdeDst;
3621# if PGM_GST_TYPE != PGM_TYPE_PAE
3622 X86PDPE PdpeSrc;
3623
3624 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3625 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3626# endif
3627 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3628 if (rc != VINF_SUCCESS)
3629 {
3630 pgmUnlock(pVM);
3631 AssertRC(rc);
3632 return rc;
3633 }
3634 Assert(pPDDst);
3635 PdeDst = pPDDst->a[iPDDst];
3636
3637# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3638 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3639 PX86PDPAE pPDDst;
3640 X86PDEPAE PdeDst;
3641
3642# if PGM_GST_TYPE == PGM_TYPE_PROT
3643 /* AMD-V nested paging */
3644 X86PML4E Pml4eSrc;
3645 X86PDPE PdpeSrc;
3646 PX86PML4E pPml4eSrc = &Pml4eSrc;
3647
3648 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3649 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3650 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3651# endif
3652
3653 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3654 if (rc != VINF_SUCCESS)
3655 {
3656 pgmUnlock(pVM);
3657 AssertRC(rc);
3658 return rc;
3659 }
3660 Assert(pPDDst);
3661 PdeDst = pPDDst->a[iPDDst];
3662# endif
3663 if (!(PdeDst.u & PGM_PDFLAGS_MAPPING))
3664 {
3665 if (!PdeDst.n.u1Present)
3666 {
3667 /** @todo r=bird: This guy will set the A bit on the PDE,
3668 * probably harmless. */
3669 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3670 }
3671 else
3672 {
3673 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3674 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3675 * makes no sense to prefetch more than one page.
3676 */
3677 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3678 if (RT_SUCCESS(rc))
3679 rc = VINF_SUCCESS;
3680 }
3681 }
3682 pgmUnlock(pVM);
3683 }
3684 return rc;
3685
3686#elif PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3687 return VINF_SUCCESS; /* ignore */
3688#else
3689 AssertCompile(0);
3690#endif
3691}
3692
3693
3694
3695
3696/**
3697 * Syncs a page during a PGMVerifyAccess() call.
3698 *
3699 * @returns VBox status code (informational included).
3700 * @param pVCpu The VMCPU handle.
3701 * @param GCPtrPage The address of the page to sync.
3702 * @param fPage The effective guest page flags.
3703 * @param uErr The trap error code.
3704 * @remarks This will normally never be called on invalid guest page
3705 * translation entries.
3706 */
3707PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPU pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3708{
3709 PVM pVM = pVCpu->CTX_SUFF(pVM);
3710
3711 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3712
3713 Assert(!pVM->pgm.s.fNestedPaging);
3714#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3715 || PGM_GST_TYPE == PGM_TYPE_REAL \
3716 || PGM_GST_TYPE == PGM_TYPE_PROT \
3717 || PGM_GST_TYPE == PGM_TYPE_PAE \
3718 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3719 && PGM_SHW_TYPE != PGM_TYPE_NESTED \
3720 && PGM_SHW_TYPE != PGM_TYPE_EPT
3721
3722# ifndef IN_RING0
3723 if (!(fPage & X86_PTE_US))
3724 {
3725 /*
3726 * Mark this page as safe.
3727 */
3728 /** @todo not correct for pages that contain both code and data!! */
3729 Log(("CSAMMarkPage %RGv; scanned=%d\n", GCPtrPage, true));
3730 CSAMMarkPage(pVM, GCPtrPage, true);
3731 }
3732# endif
3733
3734 /*
3735 * Get guest PD and index.
3736 */
3737 /** @todo Performance: We've done all this a jiffy ago in the
3738 * PGMGstGetPage call. */
3739# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3740# if PGM_GST_TYPE == PGM_TYPE_32BIT
3741 const unsigned iPDSrc = GCPtrPage >> GST_PD_SHIFT;
3742 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3743
3744# elif PGM_GST_TYPE == PGM_TYPE_PAE
3745 unsigned iPDSrc = 0;
3746 X86PDPE PdpeSrc;
3747 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3748 if (RT_UNLIKELY(!pPDSrc))
3749 {
3750 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3751 return VINF_EM_RAW_GUEST_TRAP;
3752 }
3753
3754# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3755 unsigned iPDSrc = 0; /* shut up gcc */
3756 PX86PML4E pPml4eSrc = NULL; /* ditto */
3757 X86PDPE PdpeSrc;
3758 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3759 if (RT_UNLIKELY(!pPDSrc))
3760 {
3761 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3762 return VINF_EM_RAW_GUEST_TRAP;
3763 }
3764# endif
3765
3766# else /* !PGM_WITH_PAGING */
3767 PGSTPD pPDSrc = NULL;
3768 const unsigned iPDSrc = 0;
3769# endif /* !PGM_WITH_PAGING */
3770 int rc = VINF_SUCCESS;
3771
3772 pgmLock(pVM);
3773
3774 /*
3775 * First check if the shadow pd is present.
3776 */
3777# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3778 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(&pVCpu->pgm.s, GCPtrPage);
3779
3780# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3781 PX86PDEPAE pPdeDst;
3782 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3783 PX86PDPAE pPDDst;
3784# if PGM_GST_TYPE != PGM_TYPE_PAE
3785 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3786 X86PDPE PdpeSrc;
3787 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3788# endif
3789 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3790 if (rc != VINF_SUCCESS)
3791 {
3792 pgmUnlock(pVM);
3793 AssertRC(rc);
3794 return rc;
3795 }
3796 Assert(pPDDst);
3797 pPdeDst = &pPDDst->a[iPDDst];
3798
3799# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3800 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3801 PX86PDPAE pPDDst;
3802 PX86PDEPAE pPdeDst;
3803
3804# if PGM_GST_TYPE == PGM_TYPE_PROT
3805 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3806 X86PML4E Pml4eSrc;
3807 X86PDPE PdpeSrc;
3808 PX86PML4E pPml4eSrc = &Pml4eSrc;
3809 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3810 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3811# endif
3812
3813 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3814 if (rc != VINF_SUCCESS)
3815 {
3816 pgmUnlock(pVM);
3817 AssertRC(rc);
3818 return rc;
3819 }
3820 Assert(pPDDst);
3821 pPdeDst = &pPDDst->a[iPDDst];
3822# endif
3823
3824# if defined(IN_RC)
3825 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3826 PGMDynLockHCPage(pVM, (uint8_t *)pPdeDst);
3827# endif
3828
3829 if (!pPdeDst->n.u1Present)
3830 {
3831 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3832 if (rc != VINF_SUCCESS)
3833 {
3834# if defined(IN_RC)
3835 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3836 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3837# endif
3838 pgmUnlock(pVM);
3839 AssertRC(rc);
3840 return rc;
3841 }
3842 }
3843
3844# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3845 /* Check for dirty bit fault */
3846 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3847 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3848 Log(("PGMVerifyAccess: success (dirty)\n"));
3849 else
3850# endif
3851 {
3852# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3853 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3854# else
3855 GSTPDE PdeSrc;
3856 PdeSrc.u = 0; /* faked so we don't have to #ifdef everything */
3857 PdeSrc.n.u1Present = 1;
3858 PdeSrc.n.u1Write = 1;
3859 PdeSrc.n.u1Accessed = 1;
3860 PdeSrc.n.u1User = 1;
3861# endif
3862
3863 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3864 if (uErr & X86_TRAP_PF_US)
3865 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncUser));
3866 else /* supervisor */
3867 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3868
3869 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3870 if (RT_SUCCESS(rc))
3871 {
3872 /* Page was successfully synced */
3873 Log2(("PGMVerifyAccess: success (sync)\n"));
3874 rc = VINF_SUCCESS;
3875 }
3876 else
3877 {
3878 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3879 rc = VINF_EM_RAW_GUEST_TRAP;
3880 }
3881 }
3882# if defined(IN_RC)
3883 /* Make sure the dynamic pPdeDst mapping will not be reused during this function. */
3884 PGMDynUnlockHCPage(pVM, (uint8_t *)pPdeDst);
3885# endif
3886 pgmUnlock(pVM);
3887 return rc;
3888
3889#else /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3890
3891 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3892 return VERR_INTERNAL_ERROR;
3893#endif /* PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_NESTED */
3894}
3895
3896
3897/**
3898 * Syncs the paging hierarchy starting at CR3.
3899 *
3900 * @returns VBox status code, no specials.
3901 * @param pVCpu The VMCPU handle.
3902 * @param cr0 Guest context CR0 register
3903 * @param cr3 Guest context CR3 register
3904 * @param cr4 Guest context CR4 register
3905 * @param fGlobal Including global page directories or not
3906 */
3907PGM_BTH_DECL(int, SyncCR3)(PVMCPU pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3908{
3909 PVM pVM = pVCpu->CTX_SUFF(pVM);
3910
3911 LogFlow(("SyncCR3 fGlobal=%d\n", !!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
3912
3913#if PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT
3914
3915 pgmLock(pVM);
3916
3917# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3918 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3919 if (pPool->cDirtyPages)
3920 pgmPoolResetDirtyPages(pVM);
3921# endif
3922
3923 /*
3924 * Update page access handlers.
3925 * The virtual are always flushed, while the physical are only on demand.
3926 * WARNING: We are incorrectly not doing global flushing on Virtual Handler updates. We'll
3927 * have to look into that later because it will have a bad influence on the performance.
3928 * @note SvL: There's no need for that. Just invalidate the virtual range(s).
3929 * bird: Yes, but that won't work for aliases.
3930 */
3931 /** @todo this MUST go away. See #1557. */
3932 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3933 PGM_GST_NAME(HandlerVirtualUpdate)(pVM, cr4);
3934 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncCR3Handlers), h);
3935 pgmUnlock(pVM);
3936#endif /* !NESTED && !EPT */
3937
3938#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
3939 /*
3940 * Nested / EPT - almost no work.
3941 */
3942 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3943 return VINF_SUCCESS;
3944
3945#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3946 /*
3947 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3948 * out the shadow parts when the guest modifies its tables.
3949 */
3950 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3951 return VINF_SUCCESS;
3952
3953#else /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3954
3955# ifndef PGM_WITHOUT_MAPPINGS
3956 /*
3957 * Check for and resolve conflicts with our guest mappings if they
3958 * are enabled and not fixed.
3959 */
3960 if (pgmMapAreMappingsFloating(&pVM->pgm.s))
3961 {
3962 int rc = pgmMapResolveConflicts(pVM);
3963 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
3964 if (rc == VINF_PGM_SYNC_CR3)
3965 {
3966 LogFlow(("SyncCR3: detected conflict -> VINF_PGM_SYNC_CR3\n"));
3967 return VINF_PGM_SYNC_CR3;
3968 }
3969 }
3970# else
3971 Assert(!pgmMapAreMappingsEnabled(&pVM->pgm.s));
3972# endif
3973 return VINF_SUCCESS;
3974#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3975}
3976
3977
3978
3979
3980#ifdef VBOX_STRICT
3981#ifdef IN_RC
3982# undef AssertMsgFailed
3983# define AssertMsgFailed Log
3984#endif
3985#ifdef IN_RING3
3986# include <VBox/dbgf.h>
3987
3988/**
3989 * Dumps a page table hierarchy use only physical addresses and cr4/lm flags.
3990 *
3991 * @returns VBox status code (VINF_SUCCESS).
3992 * @param cr3 The root of the hierarchy.
3993 * @param crr The cr4, only PAE and PSE is currently used.
3994 * @param fLongMode Set if long mode, false if not long mode.
3995 * @param cMaxDepth Number of levels to dump.
3996 * @param pHlp Pointer to the output functions.
3997 */
3998RT_C_DECLS_BEGIN
3999VMMR3DECL(int) PGMR3DumpHierarchyHC(PVM pVM, uint32_t cr3, uint32_t cr4, bool fLongMode, unsigned cMaxDepth, PCDBGFINFOHLP pHlp);
4000RT_C_DECLS_END
4001
4002#endif
4003
4004/**
4005 * Checks that the shadow page table is in sync with the guest one.
4006 *
4007 * @returns The number of errors.
4008 * @param pVM The virtual machine.
4009 * @param pVCpu The VMCPU handle.
4010 * @param cr3 Guest context CR3 register
4011 * @param cr4 Guest context CR4 register
4012 * @param GCPtr Where to start. Defaults to 0.
4013 * @param cb How much to check. Defaults to everything.
4014 */
4015PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPU pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
4016{
4017#if PGM_SHW_TYPE == PGM_TYPE_NESTED || PGM_SHW_TYPE == PGM_TYPE_EPT
4018 return 0;
4019#else
4020 unsigned cErrors = 0;
4021 PVM pVM = pVCpu->CTX_SUFF(pVM);
4022 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4023
4024#if PGM_GST_TYPE == PGM_TYPE_PAE
4025 /** @todo currently broken; crashes below somewhere */
4026 AssertFailed();
4027#endif
4028
4029#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4030 || PGM_GST_TYPE == PGM_TYPE_PAE \
4031 || PGM_GST_TYPE == PGM_TYPE_AMD64
4032
4033 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
4034 PPGMCPU pPGM = &pVCpu->pgm.s;
4035 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
4036 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
4037# ifndef IN_RING0
4038 RTHCPHYS HCPhys; /* general usage. */
4039# endif
4040 int rc;
4041
4042 /*
4043 * Check that the Guest CR3 and all its mappings are correct.
4044 */
4045 AssertMsgReturn(pPGM->GCPhysCR3 == (cr3 & GST_CR3_PAGE_MASK),
4046 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
4047 false);
4048# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
4049# if PGM_GST_TYPE == PGM_TYPE_32BIT
4050 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
4051# else
4052 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
4053# endif
4054 AssertRCReturn(rc, 1);
4055 HCPhys = NIL_RTHCPHYS;
4056 rc = pgmRamGCPhys2HCPhys(&pVM->pgm.s, cr3 & GST_CR3_PAGE_MASK, &HCPhys);
4057 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
4058# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
4059 pgmGstGet32bitPDPtr(pVCpu);
4060 RTGCPHYS GCPhys;
4061 rc = PGMR3DbgR3Ptr2GCPhys(pVM, pPGM->pGst32BitPdR3, &GCPhys);
4062 AssertRCReturn(rc, 1);
4063 AssertMsgReturn((cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
4064# endif
4065# endif /* !IN_RING0 */
4066
4067 /*
4068 * Get and check the Shadow CR3.
4069 */
4070# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4071 unsigned cPDEs = X86_PG_ENTRIES;
4072 unsigned cIncrement = X86_PG_ENTRIES * PAGE_SIZE;
4073# elif PGM_SHW_TYPE == PGM_TYPE_PAE
4074# if PGM_GST_TYPE == PGM_TYPE_32BIT
4075 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
4076# else
4077 unsigned cPDEs = X86_PG_PAE_ENTRIES;
4078# endif
4079 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
4080# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
4081 unsigned cPDEs = X86_PG_PAE_ENTRIES;
4082 unsigned cIncrement = X86_PG_PAE_ENTRIES * PAGE_SIZE;
4083# endif
4084 if (cb != ~(RTGCPTR)0)
4085 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
4086
4087/** @todo call the other two PGMAssert*() functions. */
4088
4089# if PGM_GST_TYPE == PGM_TYPE_AMD64
4090 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
4091
4092 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
4093 {
4094 PPGMPOOLPAGE pShwPdpt = NULL;
4095 PX86PML4E pPml4eSrc;
4096 PX86PML4E pPml4eDst;
4097 RTGCPHYS GCPhysPdptSrc;
4098
4099 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
4100 pPml4eDst = pgmShwGetLongModePML4EPtr(&pVCpu->pgm.s, iPml4);
4101
4102 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
4103 if (!pPml4eDst->n.u1Present)
4104 {
4105 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4106 continue;
4107 }
4108
4109 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
4110 GCPhysPdptSrc = pPml4eSrc->u & X86_PML4E_PG_MASK_FULL;
4111
4112 if (pPml4eSrc->n.u1Present != pPml4eDst->n.u1Present)
4113 {
4114 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
4115 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4116 cErrors++;
4117 continue;
4118 }
4119
4120 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
4121 {
4122 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
4123 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4124 cErrors++;
4125 continue;
4126 }
4127
4128 if ( pPml4eDst->n.u1User != pPml4eSrc->n.u1User
4129 || pPml4eDst->n.u1Write != pPml4eSrc->n.u1Write
4130 || pPml4eDst->n.u1NoExecute != pPml4eSrc->n.u1NoExecute)
4131 {
4132 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
4133 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
4134 cErrors++;
4135 continue;
4136 }
4137# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
4138 {
4139# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
4140
4141# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
4142 /*
4143 * Check the PDPTEs too.
4144 */
4145 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
4146
4147 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
4148 {
4149 unsigned iPDSrc = 0; /* initialized to shut up gcc */
4150 PPGMPOOLPAGE pShwPde = NULL;
4151 PX86PDPE pPdpeDst;
4152 RTGCPHYS GCPhysPdeSrc;
4153# if PGM_GST_TYPE == PGM_TYPE_PAE
4154 X86PDPE PdpeSrc;
4155 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
4156 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(&pVCpu->pgm.s);
4157# else
4158 PX86PML4E pPml4eSrcIgn;
4159 X86PDPE PdpeSrc;
4160 PX86PDPT pPdptDst;
4161 PX86PDPAE pPDDst;
4162 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
4163
4164 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
4165 if (rc != VINF_SUCCESS)
4166 {
4167 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
4168 GCPtr += 512 * _2M;
4169 continue; /* next PDPTE */
4170 }
4171 Assert(pPDDst);
4172# endif
4173 Assert(iPDSrc == 0);
4174
4175 pPdpeDst = &pPdptDst->a[iPdpt];
4176
4177 if (!pPdpeDst->n.u1Present)
4178 {
4179 GCPtr += 512 * _2M;
4180 continue; /* next PDPTE */
4181 }
4182
4183 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
4184 GCPhysPdeSrc = PdpeSrc.u & X86_PDPE_PG_MASK;
4185
4186 if (pPdpeDst->n.u1Present != PdpeSrc.n.u1Present)
4187 {
4188 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4189 GCPtr += 512 * _2M;
4190 cErrors++;
4191 continue;
4192 }
4193
4194 if (GCPhysPdeSrc != pShwPde->GCPhys)
4195 {
4196# if PGM_GST_TYPE == PGM_TYPE_AMD64
4197 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4198# else
4199 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
4200# endif
4201 GCPtr += 512 * _2M;
4202 cErrors++;
4203 continue;
4204 }
4205
4206# if PGM_GST_TYPE == PGM_TYPE_AMD64
4207 if ( pPdpeDst->lm.u1User != PdpeSrc.lm.u1User
4208 || pPdpeDst->lm.u1Write != PdpeSrc.lm.u1Write
4209 || pPdpeDst->lm.u1NoExecute != PdpeSrc.lm.u1NoExecute)
4210 {
4211 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
4212 GCPtr += 512 * _2M;
4213 cErrors++;
4214 continue;
4215 }
4216# endif
4217
4218# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4219 {
4220# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
4221# if PGM_GST_TYPE == PGM_TYPE_32BIT
4222 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
4223# if PGM_SHW_TYPE == PGM_TYPE_32BIT
4224 PCX86PD pPDDst = pgmShwGet32BitPDPtr(&pVCpu->pgm.s);
4225# endif
4226# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
4227 /*
4228 * Iterate the shadow page directory.
4229 */
4230 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
4231 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
4232
4233 for (;
4234 iPDDst < cPDEs;
4235 iPDDst++, GCPtr += cIncrement)
4236 {
4237# if PGM_SHW_TYPE == PGM_TYPE_PAE
4238 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pPGM, GCPtr);
4239# else
4240 const SHWPDE PdeDst = pPDDst->a[iPDDst];
4241# endif
4242 if (PdeDst.u & PGM_PDFLAGS_MAPPING)
4243 {
4244 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
4245 if ((PdeDst.u & X86_PDE_AVL_MASK) != PGM_PDFLAGS_MAPPING)
4246 {
4247 AssertMsgFailed(("Mapping shall only have PGM_PDFLAGS_MAPPING set! PdeDst.u=%#RX64\n", (uint64_t)PdeDst.u));
4248 cErrors++;
4249 continue;
4250 }
4251 }
4252 else if ( (PdeDst.u & X86_PDE_P)
4253 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
4254 )
4255 {
4256 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
4257 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
4258 if (!pPoolPage)
4259 {
4260 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
4261 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
4262 cErrors++;
4263 continue;
4264 }
4265 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR(pVM, pPoolPage);
4266
4267 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
4268 {
4269 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
4270 GCPtr, (uint64_t)PdeDst.u));
4271 cErrors++;
4272 }
4273
4274 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
4275 {
4276 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
4277 GCPtr, (uint64_t)PdeDst.u));
4278 cErrors++;
4279 }
4280
4281 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
4282 if (!PdeSrc.n.u1Present)
4283 {
4284 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
4285 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
4286 cErrors++;
4287 continue;
4288 }
4289
4290 if ( !PdeSrc.b.u1Size
4291 || !fBigPagesSupported)
4292 {
4293 GCPhysGst = PdeSrc.u & GST_PDE_PG_MASK;
4294# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4295 GCPhysGst |= (iPDDst & 1) * (PAGE_SIZE / 2);
4296# endif
4297 }
4298 else
4299 {
4300# if PGM_GST_TYPE == PGM_TYPE_32BIT
4301 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
4302 {
4303 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
4304 GCPtr, (uint64_t)PdeSrc.u));
4305 cErrors++;
4306 continue;
4307 }
4308# endif
4309 GCPhysGst = GST_GET_PDE_BIG_PG_GCPHYS(pVM, PdeSrc);
4310# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4311 GCPhysGst |= GCPtr & RT_BIT(X86_PAGE_2M_SHIFT);
4312# endif
4313 }
4314
4315 if ( pPoolPage->enmKind
4316 != (!PdeSrc.b.u1Size || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
4317 {
4318 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
4319 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
4320 cErrors++;
4321 }
4322
4323 PPGMPAGE pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4324 if (!pPhysPage)
4325 {
4326 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4327 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4328 cErrors++;
4329 continue;
4330 }
4331
4332 if (GCPhysGst != pPoolPage->GCPhys)
4333 {
4334 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
4335 GCPhysGst, pPoolPage->GCPhys, GCPtr));
4336 cErrors++;
4337 continue;
4338 }
4339
4340 if ( !PdeSrc.b.u1Size
4341 || !fBigPagesSupported)
4342 {
4343 /*
4344 * Page Table.
4345 */
4346 const GSTPT *pPTSrc;
4347 rc = PGM_GCPHYS_2_PTR(pVM, GCPhysGst & ~(RTGCPHYS)(PAGE_SIZE - 1), &pPTSrc);
4348 if (RT_FAILURE(rc))
4349 {
4350 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
4351 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
4352 cErrors++;
4353 continue;
4354 }
4355 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
4356 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
4357 {
4358 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
4359 // (This problem will go away when/if we shadow multiple CR3s.)
4360 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4361 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4362 cErrors++;
4363 continue;
4364 }
4365 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4366 {
4367 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
4368 GCPtr, (uint64_t)PdeDst.u));
4369 cErrors++;
4370 continue;
4371 }
4372
4373 /* iterate the page table. */
4374# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
4375 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
4376 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
4377# else
4378 const unsigned offPTSrc = 0;
4379# endif
4380 for (unsigned iPT = 0, off = 0;
4381 iPT < RT_ELEMENTS(pPTDst->a);
4382 iPT++, off += PAGE_SIZE)
4383 {
4384 const SHWPTE PteDst = pPTDst->a[iPT];
4385
4386 /* skip not-present entries. */
4387 if (!(PteDst.u & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
4388 continue;
4389 Assert(PteDst.n.u1Present);
4390
4391 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
4392 if (!PteSrc.n.u1Present)
4393 {
4394# ifdef IN_RING3
4395 PGMAssertHandlerAndFlagsInSync(pVM);
4396 PGMR3DumpHierarchyGC(pVM, cr3, cr4, (PdeSrc.u & GST_PDE_PG_MASK));
4397# endif
4398 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
4399 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u, pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
4400 (PdeSrc.u & GST_PDE_PG_MASK) + (iPT + offPTSrc)*sizeof(PteSrc)));
4401 cErrors++;
4402 continue;
4403 }
4404
4405 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
4406# if 1 /** @todo sync accessed bit properly... */
4407 fIgnoreFlags |= X86_PTE_A;
4408# endif
4409
4410 /* match the physical addresses */
4411 HCPhysShw = PteDst.u & SHW_PTE_PG_MASK;
4412 GCPhysGst = PteSrc.u & GST_PTE_PG_MASK;
4413
4414# ifdef IN_RING3
4415 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4416 if (RT_FAILURE(rc))
4417 {
4418 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4419 {
4420 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4421 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4422 cErrors++;
4423 continue;
4424 }
4425 }
4426 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4427 {
4428 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4429 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4430 cErrors++;
4431 continue;
4432 }
4433# endif
4434
4435 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4436 if (!pPhysPage)
4437 {
4438# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4439 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4440 {
4441 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4442 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4443 cErrors++;
4444 continue;
4445 }
4446# endif
4447 if (PteDst.n.u1Write)
4448 {
4449 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4450 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4451 cErrors++;
4452 }
4453 fIgnoreFlags |= X86_PTE_RW;
4454 }
4455 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4456 {
4457 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4458 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4459 cErrors++;
4460 continue;
4461 }
4462
4463 /* flags */
4464 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4465 {
4466 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4467 {
4468 if (PteDst.n.u1Write)
4469 {
4470 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4471 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4472 cErrors++;
4473 continue;
4474 }
4475 fIgnoreFlags |= X86_PTE_RW;
4476 }
4477 else
4478 {
4479 if ( PteDst.n.u1Present
4480# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4481 && !PGM_PAGE_IS_MMIO(pPhysPage)
4482# endif
4483 )
4484 {
4485 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4486 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4487 cErrors++;
4488 continue;
4489 }
4490 fIgnoreFlags |= X86_PTE_P;
4491 }
4492 }
4493 else
4494 {
4495 if (!PteSrc.n.u1Dirty && PteSrc.n.u1Write)
4496 {
4497 if (PteDst.n.u1Write)
4498 {
4499 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4500 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4501 cErrors++;
4502 continue;
4503 }
4504 if (!(PteDst.u & PGM_PTFLAGS_TRACK_DIRTY))
4505 {
4506 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4507 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4508 cErrors++;
4509 continue;
4510 }
4511 if (PteDst.n.u1Dirty)
4512 {
4513 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4514 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4515 cErrors++;
4516 }
4517# if 0 /** @todo sync access bit properly... */
4518 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4519 {
4520 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4521 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4522 cErrors++;
4523 }
4524 fIgnoreFlags |= X86_PTE_RW;
4525# else
4526 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4527# endif
4528 }
4529 else if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4530 {
4531 /* access bit emulation (not implemented). */
4532 if (PteSrc.n.u1Accessed || PteDst.n.u1Present)
4533 {
4534 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4535 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4536 cErrors++;
4537 continue;
4538 }
4539 if (!PteDst.n.u1Accessed)
4540 {
4541 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4542 GCPtr + off, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4543 cErrors++;
4544 }
4545 fIgnoreFlags |= X86_PTE_P;
4546 }
4547# ifdef DEBUG_sandervl
4548 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4549# endif
4550 }
4551
4552 if ( (PteSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4553 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags)
4554 )
4555 {
4556 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4557 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4558 fIgnoreFlags, (uint64_t)PteSrc.u, (uint64_t)PteDst.u));
4559 cErrors++;
4560 continue;
4561 }
4562 } /* foreach PTE */
4563 }
4564 else
4565 {
4566 /*
4567 * Big Page.
4568 */
4569 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4570 if (!PdeSrc.b.u1Dirty && PdeSrc.b.u1Write)
4571 {
4572 if (PdeDst.n.u1Write)
4573 {
4574 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4575 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4576 cErrors++;
4577 continue;
4578 }
4579 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4580 {
4581 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4582 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4583 cErrors++;
4584 continue;
4585 }
4586# if 0 /** @todo sync access bit properly... */
4587 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4588 {
4589 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4590 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4591 cErrors++;
4592 }
4593 fIgnoreFlags |= X86_PTE_RW;
4594# else
4595 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4596# endif
4597 }
4598 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4599 {
4600 /* access bit emulation (not implemented). */
4601 if (PdeSrc.b.u1Accessed || PdeDst.n.u1Present)
4602 {
4603 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4604 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4605 cErrors++;
4606 continue;
4607 }
4608 if (!PdeDst.n.u1Accessed)
4609 {
4610 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4611 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4612 cErrors++;
4613 }
4614 fIgnoreFlags |= X86_PTE_P;
4615 }
4616
4617 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4618 {
4619 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4620 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4621 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4622 cErrors++;
4623 }
4624
4625 /* iterate the page table. */
4626 for (unsigned iPT = 0, off = 0;
4627 iPT < RT_ELEMENTS(pPTDst->a);
4628 iPT++, off += PAGE_SIZE, GCPhysGst += PAGE_SIZE)
4629 {
4630 const SHWPTE PteDst = pPTDst->a[iPT];
4631
4632 if (PteDst.u & PGM_PTFLAGS_TRACK_DIRTY)
4633 {
4634 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4635 GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4636 cErrors++;
4637 }
4638
4639 /* skip not-present entries. */
4640 if (!PteDst.n.u1Present) /** @todo deal with ALL handlers and CSAM !P pages! */
4641 continue;
4642
4643 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4644
4645 /* match the physical addresses */
4646 HCPhysShw = PteDst.u & X86_PTE_PAE_PG_MASK;
4647
4648# ifdef IN_RING3
4649 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4650 if (RT_FAILURE(rc))
4651 {
4652 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4653 {
4654 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4655 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4656 cErrors++;
4657 }
4658 }
4659 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4660 {
4661 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4662 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4663 cErrors++;
4664 continue;
4665 }
4666# endif
4667 pPhysPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysGst);
4668 if (!pPhysPage)
4669 {
4670# ifdef IN_RING3 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4671 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4672 {
4673 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4674 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4675 cErrors++;
4676 continue;
4677 }
4678# endif
4679 if (PteDst.n.u1Write)
4680 {
4681 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4682 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4683 cErrors++;
4684 }
4685 fIgnoreFlags |= X86_PTE_RW;
4686 }
4687 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4688 {
4689 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4690 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4691 cErrors++;
4692 continue;
4693 }
4694
4695 /* flags */
4696 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4697 {
4698 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4699 {
4700 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4701 {
4702 if (PteDst.n.u1Write)
4703 {
4704 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4705 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4706 cErrors++;
4707 continue;
4708 }
4709 fIgnoreFlags |= X86_PTE_RW;
4710 }
4711 }
4712 else
4713 {
4714 if ( PteDst.n.u1Present
4715# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4716 && !PGM_PAGE_IS_MMIO(pPhysPage)
4717# endif
4718 )
4719 {
4720 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4721 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4722 cErrors++;
4723 continue;
4724 }
4725 fIgnoreFlags |= X86_PTE_P;
4726 }
4727 }
4728
4729 if ( (PdeSrc.u & ~fIgnoreFlags) != (PteDst.u & ~fIgnoreFlags)
4730 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (PteDst.u & ~fIgnoreFlags) /* lazy phys handler dereg. */
4731 )
4732 {
4733 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4734 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PteDst.u & ~fIgnoreFlags,
4735 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PteDst.u));
4736 cErrors++;
4737 continue;
4738 }
4739 } /* for each PTE */
4740 }
4741 }
4742 /* not present */
4743
4744 } /* for each PDE */
4745
4746 } /* for each PDPTE */
4747
4748 } /* for each PML4E */
4749
4750# ifdef DEBUG
4751 if (cErrors)
4752 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4753# endif
4754
4755#endif /* GST == 32BIT, PAE or AMD64 */
4756 return cErrors;
4757
4758#endif /* PGM_SHW_TYPE != PGM_TYPE_NESTED && PGM_SHW_TYPE != PGM_TYPE_EPT */
4759}
4760#endif /* VBOX_STRICT */
4761
4762
4763/**
4764 * Sets up the CR3 for shadow paging
4765 *
4766 * @returns Strict VBox status code.
4767 * @retval VINF_SUCCESS.
4768 *
4769 * @param pVCpu The VMCPU handle.
4770 * @param GCPhysCR3 The physical address in the CR3 register.
4771 */
4772PGM_BTH_DECL(int, MapCR3)(PVMCPU pVCpu, RTGCPHYS GCPhysCR3)
4773{
4774 PVM pVM = pVCpu->CTX_SUFF(pVM);
4775
4776 /* Update guest paging info. */
4777#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4778 || PGM_GST_TYPE == PGM_TYPE_PAE \
4779 || PGM_GST_TYPE == PGM_TYPE_AMD64
4780
4781 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4782
4783 /*
4784 * Map the page CR3 points at.
4785 */
4786 RTHCPTR HCPtrGuestCR3;
4787 RTHCPHYS HCPhysGuestCR3;
4788 pgmLock(pVM);
4789 PPGMPAGE pPageCR3 = pgmPhysGetPage(&pVM->pgm.s, GCPhysCR3);
4790 AssertReturn(pPageCR3, VERR_INTERNAL_ERROR_2);
4791 HCPhysGuestCR3 = PGM_PAGE_GET_HCPHYS(pPageCR3);
4792 /** @todo this needs some reworking wrt. locking. */
4793# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4794 HCPtrGuestCR3 = NIL_RTHCPTR;
4795 int rc = VINF_SUCCESS;
4796# else
4797 int rc = pgmPhysGCPhys2CCPtrInternal(pVM, pPageCR3, GCPhysCR3 & GST_CR3_PAGE_MASK, (void **)&HCPtrGuestCR3); /** @todo r=bird: This GCPhysCR3 masking isn't necessary. */
4798# endif
4799 pgmUnlock(pVM);
4800 if (RT_SUCCESS(rc))
4801 {
4802 rc = PGMMap(pVM, (RTGCPTR)pVM->pgm.s.GCPtrCR3Mapping, HCPhysGuestCR3, PAGE_SIZE, 0);
4803 if (RT_SUCCESS(rc))
4804 {
4805# ifdef IN_RC
4806 PGM_INVL_PG(pVCpu, pVM->pgm.s.GCPtrCR3Mapping);
4807# endif
4808# if PGM_GST_TYPE == PGM_TYPE_32BIT
4809 pVCpu->pgm.s.pGst32BitPdR3 = (R3PTRTYPE(PX86PD))HCPtrGuestCR3;
4810# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4811 pVCpu->pgm.s.pGst32BitPdR0 = (R0PTRTYPE(PX86PD))HCPtrGuestCR3;
4812# endif
4813 pVCpu->pgm.s.pGst32BitPdRC = (RCPTRTYPE(PX86PD))(RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping;
4814
4815# elif PGM_GST_TYPE == PGM_TYPE_PAE
4816 unsigned off = GCPhysCR3 & GST_CR3_PAGE_MASK & PAGE_OFFSET_MASK;
4817 pVCpu->pgm.s.pGstPaePdptR3 = (R3PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4818# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4819 pVCpu->pgm.s.pGstPaePdptR0 = (R0PTRTYPE(PX86PDPT))HCPtrGuestCR3;
4820# endif
4821 pVCpu->pgm.s.pGstPaePdptRC = (RCPTRTYPE(PX86PDPT))((RTRCUINTPTR)pVM->pgm.s.GCPtrCR3Mapping + off);
4822 LogFlow(("Cached mapping %RRv\n", pVCpu->pgm.s.pGstPaePdptRC));
4823
4824 /*
4825 * Map the 4 PDs too.
4826 */
4827 PX86PDPT pGuestPDPT = pgmGstGetPaePDPTPtr(pVCpu);
4828 RTGCPTR GCPtr = pVM->pgm.s.GCPtrCR3Mapping + PAGE_SIZE;
4829 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++, GCPtr += PAGE_SIZE)
4830 {
4831 if (pGuestPDPT->a[i].n.u1Present)
4832 {
4833 RTHCPTR HCPtr;
4834 RTHCPHYS HCPhys;
4835 RTGCPHYS GCPhys = pGuestPDPT->a[i].u & X86_PDPE_PG_MASK;
4836 pgmLock(pVM);
4837 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
4838 AssertReturn(pPage, VERR_INTERNAL_ERROR_2);
4839 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
4840# if defined(IN_RC) || defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4841 HCPtr = NIL_RTHCPTR;
4842 int rc2 = VINF_SUCCESS;
4843# else
4844 int rc2 = pgmPhysGCPhys2CCPtrInternal(pVM, pPage, GCPhys, (void **)&HCPtr);
4845# endif
4846 pgmUnlock(pVM);
4847 if (RT_SUCCESS(rc2))
4848 {
4849 rc = PGMMap(pVM, GCPtr, HCPhys, PAGE_SIZE, 0);
4850 AssertRCReturn(rc, rc);
4851
4852 pVCpu->pgm.s.apGstPaePDsR3[i] = (R3PTRTYPE(PX86PDPAE))HCPtr;
4853# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4854 pVCpu->pgm.s.apGstPaePDsR0[i] = (R0PTRTYPE(PX86PDPAE))HCPtr;
4855# endif
4856 pVCpu->pgm.s.apGstPaePDsRC[i] = (RCPTRTYPE(PX86PDPAE))(RTRCUINTPTR)GCPtr;
4857 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = GCPhys;
4858# ifdef IN_RC
4859 PGM_INVL_PG(pVCpu, GCPtr);
4860# endif
4861 continue;
4862 }
4863 AssertMsgFailed(("pgmR3Gst32BitMapCR3: rc2=%d GCPhys=%RGp i=%d\n", rc2, GCPhys, i));
4864 }
4865
4866 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4867# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4868 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4869# endif
4870 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
4871 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4872# ifdef IN_RC
4873 PGM_INVL_PG(pVCpu, GCPtr); /** @todo this shouldn't be necessary? */
4874# endif
4875 }
4876
4877# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4878 pVCpu->pgm.s.pGstAmd64Pml4R3 = (R3PTRTYPE(PX86PML4))HCPtrGuestCR3;
4879# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
4880 pVCpu->pgm.s.pGstAmd64Pml4R0 = (R0PTRTYPE(PX86PML4))HCPtrGuestCR3;
4881# endif
4882# endif
4883 }
4884 else
4885 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4886 }
4887 else
4888 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4889
4890#else /* prot/real stub */
4891 int rc = VINF_SUCCESS;
4892#endif
4893
4894 /* Update shadow paging info for guest modes with paging (32, pae, 64). */
4895# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4896 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4897 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4898 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4899 && PGM_GST_TYPE != PGM_TYPE_PROT))
4900
4901 Assert(!pVM->pgm.s.fNestedPaging);
4902
4903 /*
4904 * Update the shadow root page as well since that's not fixed.
4905 */
4906 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4907 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4908 uint32_t iOldShwUserTable = pVCpu->pgm.s.iShwUserTable;
4909 uint32_t iOldShwUser = pVCpu->pgm.s.iShwUser;
4910 PPGMPOOLPAGE pNewShwPageCR3;
4911
4912 pgmLock(pVM);
4913
4914# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4915 if (pPool->cDirtyPages)
4916 pgmPoolResetDirtyPages(pVM);
4917# endif
4918
4919 Assert(!(GCPhysCR3 >> (PAGE_SHIFT + 32)));
4920 rc = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, SHW_POOL_ROOT_IDX, GCPhysCR3 >> PAGE_SHIFT, &pNewShwPageCR3, true /* lock page */);
4921 AssertFatalRC(rc);
4922 rc = VINF_SUCCESS;
4923
4924# ifdef IN_RC
4925 /*
4926 * WARNING! We can't deal with jumps to ring 3 in the code below as the
4927 * state will be inconsistent! Flush important things now while
4928 * we still can and then make sure there are no ring-3 calls.
4929 */
4930 REMNotifyHandlerPhysicalFlushIfAlmostFull(pVM, pVCpu);
4931 VMMRZCallRing3Disable(pVCpu);
4932# endif
4933
4934 pVCpu->pgm.s.iShwUser = SHW_POOL_ROOT_IDX;
4935 pVCpu->pgm.s.iShwUserTable = GCPhysCR3 >> PAGE_SHIFT;
4936 pVCpu->pgm.s.CTX_SUFF(pShwPageCR3) = pNewShwPageCR3;
4937# ifdef IN_RING0
4938 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4939 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4940# elif defined(IN_RC)
4941 pVCpu->pgm.s.pShwPageCR3R3 = MMHyperCCToR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4942 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4943# else
4944 pVCpu->pgm.s.pShwPageCR3R0 = MMHyperCCToR0(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4945 pVCpu->pgm.s.pShwPageCR3RC = MMHyperCCToRC(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4946# endif
4947
4948# ifndef PGM_WITHOUT_MAPPINGS
4949 /*
4950 * Apply all hypervisor mappings to the new CR3.
4951 * Note that SyncCR3 will be executed in case CR3 is changed in a guest paging mode; this will
4952 * make sure we check for conflicts in the new CR3 root.
4953 */
4954# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
4955 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL) || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
4956# endif
4957 rc = pgmMapActivateCR3(pVM, pNewShwPageCR3);
4958 AssertRCReturn(rc, rc);
4959# endif
4960
4961 /* Set the current hypervisor CR3. */
4962 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4963 SELMShadowCR3Changed(pVM, pVCpu);
4964
4965# ifdef IN_RC
4966 /* NOTE: The state is consistent again. */
4967 VMMRZCallRing3Enable(pVCpu);
4968# endif
4969
4970 /* Clean up the old CR3 root. */
4971 if ( pOldShwPageCR3
4972 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4973 {
4974 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4975# ifndef PGM_WITHOUT_MAPPINGS
4976 /* Remove the hypervisor mappings from the shadow page table. */
4977 pgmMapDeactivateCR3(pVM, pOldShwPageCR3);
4978# endif
4979 /* Mark the page as unlocked; allow flushing again. */
4980 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4981
4982 pgmPoolFreeByPage(pPool, pOldShwPageCR3, iOldShwUser, iOldShwUserTable);
4983 }
4984 pgmUnlock(pVM);
4985# endif
4986
4987 return rc;
4988}
4989
4990/**
4991 * Unmaps the shadow CR3.
4992 *
4993 * @returns VBox status, no specials.
4994 * @param pVCpu The VMCPU handle.
4995 */
4996PGM_BTH_DECL(int, UnmapCR3)(PVMCPU pVCpu)
4997{
4998 LogFlow(("UnmapCR3\n"));
4999
5000 int rc = VINF_SUCCESS;
5001 PVM pVM = pVCpu->CTX_SUFF(pVM);
5002
5003 /*
5004 * Update guest paging info.
5005 */
5006#if PGM_GST_TYPE == PGM_TYPE_32BIT
5007 pVCpu->pgm.s.pGst32BitPdR3 = 0;
5008# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
5009 pVCpu->pgm.s.pGst32BitPdR0 = 0;
5010# endif
5011 pVCpu->pgm.s.pGst32BitPdRC = 0;
5012
5013#elif PGM_GST_TYPE == PGM_TYPE_PAE
5014 pVCpu->pgm.s.pGstPaePdptR3 = 0;
5015# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
5016 pVCpu->pgm.s.pGstPaePdptR0 = 0;
5017# endif
5018 pVCpu->pgm.s.pGstPaePdptRC = 0;
5019 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
5020 {
5021 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
5022# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
5023 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
5024# endif
5025 pVCpu->pgm.s.apGstPaePDsRC[i] = 0;
5026 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
5027 }
5028
5029#elif PGM_GST_TYPE == PGM_TYPE_AMD64
5030 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
5031# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE
5032 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
5033# endif
5034
5035#else /* prot/real mode stub */
5036 /* nothing to do */
5037#endif
5038
5039#if !defined(IN_RC) /* In RC we rely on MapCR3 to do the shadow part for us at a safe time */
5040 /*
5041 * Update shadow paging info.
5042 */
5043# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
5044 || PGM_SHW_TYPE == PGM_TYPE_PAE \
5045 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
5046
5047# if PGM_GST_TYPE != PGM_TYPE_REAL
5048 Assert(!pVM->pgm.s.fNestedPaging);
5049# endif
5050
5051 pgmLock(pVM);
5052
5053# ifndef PGM_WITHOUT_MAPPINGS
5054 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
5055 /* Remove the hypervisor mappings from the shadow page table. */
5056 pgmMapDeactivateCR3(pVM, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
5057# endif
5058
5059 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
5060 {
5061 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5062
5063 Assert(pVCpu->pgm.s.iShwUser != PGMPOOL_IDX_NESTED_ROOT);
5064
5065# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5066 if (pPool->cDirtyPages)
5067 pgmPoolResetDirtyPages(pVM);
5068# endif
5069
5070 /* Mark the page as unlocked; allow flushing again. */
5071 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
5072
5073 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), pVCpu->pgm.s.iShwUser, pVCpu->pgm.s.iShwUserTable);
5074 pVCpu->pgm.s.pShwPageCR3R3 = 0;
5075 pVCpu->pgm.s.pShwPageCR3R0 = 0;
5076 pVCpu->pgm.s.pShwPageCR3RC = 0;
5077 pVCpu->pgm.s.iShwUser = 0;
5078 pVCpu->pgm.s.iShwUserTable = 0;
5079 }
5080 pgmUnlock(pVM);
5081# endif
5082#endif /* !IN_RC*/
5083
5084 return rc;
5085}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette