VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp@ 29287

Last change on this file since 29287 was 29217, checked in by vboxsync, 15 years ago

Shared paging updates

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 16.5 KB
Line 
1/* $Id: PGMR0.cpp 29217 2010-05-07 14:38:51Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_PGM
22#include <VBox/pgm.h>
23#include "../PGMInternal.h"
24#include <VBox/vm.h>
25#include "../PGMInline.h"
26#include <VBox/log.h>
27#include <VBox/err.h>
28#include <iprt/assert.h>
29#include <iprt/mem.h>
30
31RT_C_DECLS_BEGIN
32#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
33#include "PGMR0Bth.h"
34#undef PGM_BTH_NAME
35
36#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
37#include "PGMR0Bth.h"
38#undef PGM_BTH_NAME
39
40#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
41#include "PGMR0Bth.h"
42#undef PGM_BTH_NAME
43
44#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
45#include "PGMR0Bth.h"
46#undef PGM_BTH_NAME
47
48RT_C_DECLS_END
49
50
51/**
52 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
53 *
54 * @returns The following VBox status codes.
55 * @retval VINF_SUCCESS on success. FF cleared.
56 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
57 *
58 * @param pVM The VM handle.
59 * @param pVCpu The VMCPU handle.
60 *
61 * @remarks Must be called from within the PGM critical section. The caller
62 * must clear the new pages.
63 */
64VMMR0DECL(int) PGMR0PhysAllocateHandyPages(PVM pVM, PVMCPU pVCpu)
65{
66 Assert(PDMCritSectIsOwnerEx(&pVM->pgm.s.CritSect, pVCpu->idCpu));
67
68 /*
69 * Check for error injection.
70 */
71 if (RT_UNLIKELY(pVM->pgm.s.fErrInjHandyPages))
72 return VERR_NO_MEMORY;
73
74 /*
75 * Try allocate a full set of handy pages.
76 */
77 uint32_t iFirst = pVM->pgm.s.cHandyPages;
78 AssertReturn(iFirst <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), VERR_INTERNAL_ERROR);
79 uint32_t cPages = RT_ELEMENTS(pVM->pgm.s.aHandyPages) - iFirst;
80 if (!cPages)
81 return VINF_SUCCESS;
82 int rc = GMMR0AllocateHandyPages(pVM, pVCpu->idCpu, cPages, cPages, &pVM->pgm.s.aHandyPages[iFirst]);
83 if (RT_SUCCESS(rc))
84 {
85 for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
86 {
87 Assert(pVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
88 Assert(pVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
89 Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
90 Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
91 Assert(!(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
92 }
93
94 pVM->pgm.s.cHandyPages = RT_ELEMENTS(pVM->pgm.s.aHandyPages);
95 }
96 else if (rc != VERR_GMM_SEED_ME)
97 {
98 if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
99 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
100 && iFirst < PGM_HANDY_PAGES_MIN)
101 {
102
103#ifdef VBOX_STRICT
104 /* We're ASSUMING that GMM has updated all the entires before failing us. */
105 uint32_t i;
106 for (i = iFirst; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
107 {
108 Assert(pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
109 Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
110 Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
111 }
112#endif
113
114 /*
115 * Reduce the number of pages until we hit the minimum limit.
116 */
117 do
118 {
119 cPages >>= 2;
120 if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
121 cPages = PGM_HANDY_PAGES_MIN - iFirst;
122 rc = GMMR0AllocateHandyPages(pVM, pVCpu->idCpu, cPages, cPages, &pVM->pgm.s.aHandyPages[iFirst]);
123 } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
124 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
125 && cPages + iFirst > PGM_HANDY_PAGES_MIN);
126 if (RT_SUCCESS(rc))
127 {
128#ifdef VBOX_STRICT
129 i = iFirst + cPages;
130 while (i-- > 0)
131 {
132 Assert(pVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
133 Assert(pVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
134 Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
135 Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
136 Assert(!(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
137 }
138
139 for (i = cPages + iFirst; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
140 {
141 Assert(pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
142 Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
143 Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
144 }
145#endif
146
147 pVM->pgm.s.cHandyPages = iFirst + cPages;
148 }
149 }
150
151 if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
152 {
153 LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
154 VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
155 }
156 }
157
158
159 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
160 return rc;
161}
162
163/**
164 * Worker function for PGMR3PhysAllocateLargeHandyPage
165 *
166 * @returns The following VBox status codes.
167 * @retval VINF_SUCCESS on success.
168 * @retval VINF_EM_NO_MEMORY if we're out of memory.
169 *
170 * @param pVM The VM handle.
171 * @param pVCpu The VMCPU handle.
172 *
173 * @remarks Must be called from within the PGM critical section. The caller
174 * must clear the new pages.
175 */
176VMMR0DECL(int) PGMR0PhysAllocateLargeHandyPage(PVM pVM, PVMCPU pVCpu)
177{
178 Assert(PDMCritSectIsOwnerEx(&pVM->pgm.s.CritSect, pVCpu->idCpu));
179
180 Assert(!pVM->pgm.s.cLargeHandyPages);
181 int rc = GMMR0AllocateLargePage(pVM, pVCpu->idCpu, _2M, &pVM->pgm.s.aLargeHandyPage[0].idPage, &pVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
182 if (RT_SUCCESS(rc))
183 pVM->pgm.s.cLargeHandyPages = 1;
184
185 return rc;
186}
187
188/**
189 * #PF Handler for nested paging.
190 *
191 * @returns VBox status code (appropriate for trap handling and GC return).
192 * @param pVM VM Handle.
193 * @param pVCpu VMCPU Handle.
194 * @param enmShwPagingMode Paging mode for the nested page tables
195 * @param uErr The trap error code.
196 * @param pRegFrame Trap register frame.
197 * @param pvFault The fault address.
198 */
199VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS pvFault)
200{
201 int rc;
202
203 LogFlow(("PGMTrap0eHandler: uErr=%RGx pvFault=%RGp eip=%RGv\n", uErr, pvFault, (RTGCPTR)pRegFrame->rip));
204 STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0e, a);
205 STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
206
207 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
208 AssertMsg(enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT, ("enmShwPagingMode=%d\n", enmShwPagingMode));
209
210#ifdef VBOX_WITH_STATISTICS
211 /*
212 * Error code stats.
213 */
214 if (uErr & X86_TRAP_PF_US)
215 {
216 if (!(uErr & X86_TRAP_PF_P))
217 {
218 if (uErr & X86_TRAP_PF_RW)
219 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNotPresentWrite);
220 else
221 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNotPresentRead);
222 }
223 else if (uErr & X86_TRAP_PF_RW)
224 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSWrite);
225 else if (uErr & X86_TRAP_PF_RSVD)
226 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSReserved);
227 else if (uErr & X86_TRAP_PF_ID)
228 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNXE);
229 else
230 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSRead);
231 }
232 else
233 { /* Supervisor */
234 if (!(uErr & X86_TRAP_PF_P))
235 {
236 if (uErr & X86_TRAP_PF_RW)
237 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVNotPresentWrite);
238 else
239 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVNotPresentRead);
240 }
241 else if (uErr & X86_TRAP_PF_RW)
242 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVWrite);
243 else if (uErr & X86_TRAP_PF_ID)
244 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSNXE);
245 else if (uErr & X86_TRAP_PF_RSVD)
246 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVReserved);
247 }
248#endif
249
250 /*
251 * Call the worker.
252 *
253 * We pretend the guest is in protected mode without paging, so we can use existing code to build the
254 * nested page tables.
255 */
256 bool fLockTaken = false;
257 switch(enmShwPagingMode)
258 {
259 case PGMMODE_32_BIT:
260 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken);
261 break;
262 case PGMMODE_PAE:
263 case PGMMODE_PAE_NX:
264 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken);
265 break;
266 case PGMMODE_AMD64:
267 case PGMMODE_AMD64_NX:
268 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken);
269 break;
270 case PGMMODE_EPT:
271 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken);
272 break;
273 default:
274 AssertFailed();
275 rc = VERR_INVALID_PARAMETER;
276 break;
277 }
278 if (fLockTaken)
279 {
280 Assert(PGMIsLockOwner(pVM));
281 pgmUnlock(pVM);
282 }
283 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
284 rc = VINF_SUCCESS;
285 else
286 /* Note: hack alert for difficult to reproduce problem. */
287 if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
288 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
289 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
290 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
291 {
292 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, pvFault, uErr, pRegFrame->rip));
293 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about single VCPU VMs though. */
294 rc = VINF_SUCCESS;
295 }
296
297 STAM_STATS({ if (!pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
298 pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2Misc; });
299 STAM_PROFILE_STOP_EX(&pVCpu->pgm.s.StatRZTrap0e, pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
300 return rc;
301}
302
303#ifdef VBOX_WITH_PAGE_SHARING
304/**
305 * Check a registered module for shared page changes
306 *
307 * @returns The following VBox status codes.
308 *
309 * @param pVM The VM handle.
310 * @param pVCpu The VMCPU handle.
311 * @param pReq Module request packet
312 */
313VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PVMCPU pVCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
314{
315 int rc = VINF_SUCCESS;
316 PGMMSHAREDPAGEDESC paPageDesc = NULL;
317 uint32_t cbPreviousRegion = 0;
318 bool fFlushTLBs = false;
319
320 /*
321 * Validate input.
322 */
323 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
324 AssertMsgReturn(pReq->Hdr.cbReq >= sizeof(*pReq) && pReq->Hdr.cbReq == RT_UOFFSETOF(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
325
326 pgmLock(pVM);
327
328 /* Check every region of the shared module. */
329 for (unsigned i = 0; i < pReq->cRegions; i++)
330 {
331 Assert((pReq->aRegions[i].cbRegion & 0xfff) == 0);
332 Assert((pReq->aRegions[i].GCRegionAddr & 0xfff) == 0);
333
334 RTGCPTR GCRegion = pReq->aRegions[i].GCRegionAddr;
335 unsigned cbRegion = pReq->aRegions[i].cbRegion & ~0xfff;
336 unsigned idxPage = 0;
337 bool fValidChanges = false;
338
339 if (cbPreviousRegion < cbRegion)
340 {
341 if (paPageDesc)
342 RTMemFree(paPageDesc);
343
344 paPageDesc = (PGMMSHAREDPAGEDESC)RTMemAlloc((cbRegion >> PAGE_SHIFT) * sizeof(*paPageDesc));
345 if (!paPageDesc)
346 {
347 AssertFailed();
348 rc = VERR_NO_MEMORY;
349 goto end;
350 }
351 cbPreviousRegion = cbRegion;
352 }
353
354 while (cbRegion)
355 {
356 RTGCPHYS GCPhys;
357 uint64_t fFlags;
358
359 rc = PGMGstGetPage(pVCpu, GCRegion, &GCPhys, &fFlags);
360 if ( rc == VINF_SUCCESS
361 && !(fFlags & X86_PTE_RW)) /* important as we make assumptions about this below! */
362 {
363 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhys);
364 if ( pPage
365 && !PGM_PAGE_IS_SHARED(pPage))
366 {
367 fValidChanges = true;
368 paPageDesc[idxPage].uHCPhysPageId = PGM_PAGE_GET_PAGEID(pPage);
369 paPageDesc[idxPage].HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
370 paPageDesc[idxPage].GCPhys = GCPhys;
371 }
372 else
373 paPageDesc[idxPage].uHCPhysPageId = NIL_GMM_PAGEID;
374 }
375 else
376 paPageDesc[idxPage].uHCPhysPageId = NIL_GMM_PAGEID;
377
378 idxPage++;
379 GCRegion += PAGE_SIZE;
380 cbRegion -= PAGE_SIZE;
381 }
382
383 if (fValidChanges)
384 {
385 rc = GMMR0SharedModuleCheckRange(pVM, pVCpu->idCpu, pReq, i, idxPage, paPageDesc);
386 AssertRC(rc);
387 if (RT_FAILURE(rc))
388 break;
389
390 for (unsigned i = 0; i < idxPage; i++)
391 {
392 /* Any change for this page? */
393 if (paPageDesc[i].uHCPhysPageId != NIL_GMM_PAGEID)
394 {
395 /** todo: maybe cache these to prevent the nth lookup. */
396 PPGMPAGE pPage = pgmPhysGetPage(&pVM->pgm.s, paPageDesc[i].GCPhys);
397 if (!pPage)
398 {
399 /* Should never happen. */
400 AssertFailed();
401 rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
402 goto end;
403 }
404 Assert(!PGM_PAGE_IS_SHARED(pPage));
405
406 if (paPageDesc[i].HCPhys != PGM_PAGE_GET_HCPHYS(pPage))
407 {
408 bool fFlush = false;
409
410 /* Page was replaced by an existing shared version of it; clear all references first. */
411 rc = pgmPoolTrackUpdateGCPhys(pVM, paPageDesc[i].GCPhys, pPage, true /* clear the entries */, &fFlush);
412 if (RT_FAILURE(rc))
413 {
414 AssertRC(rc);
415 goto end;
416 }
417 Assert(rc == VINF_SUCCESS || (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3) && (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)));
418 if (rc = VINF_SUCCESS)
419 fFlushTLBs |= fFlush;
420
421 /* Update the physical address and page id now. */
422 PGM_PAGE_SET_HCPHYS(pPage, paPageDesc[i].HCPhys);
423 PGM_PAGE_SET_PAGEID(pPage, paPageDesc[i].uHCPhysPageId);
424
425 /* Invalidate page map TLB entry for this page too. */
426 PGMPhysInvalidatePageMapTLBEntry(pVM, paPageDesc[i].GCPhys);
427 }
428 /* else nothing changed (== this page is now a shared page), so no need to flush anything. */
429
430 PGM_PAGE_SET_STATE(pPage, PGM_PAGE_STATE_SHARED);
431 }
432 }
433 }
434 }
435
436end:
437 pgmUnlock(pVM);
438 if (fFlushTLBs)
439 PGM_INVL_ALL_VCPU_TLBS(pVM);
440
441 if (paPageDesc)
442 RTMemFree(paPageDesc);
443
444 return rc;
445}
446#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette