VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14362

Last change on this file since 14362 was 14362, checked in by vboxsync, 16 years ago

#1865: ring-0 mapping cache, code in progress.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 18.2 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14362 2008-11-19 17:04:25Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#include <VBox/pgm.h>
26#include "../PGMInternal.h"
27#include <VBox/vm.h>
28#include <VBox/err.h>
29#include <iprt/asm.h>
30#include <iprt/assert.h>
31#include <iprt/cpuset.h>
32#include <iprt/spinlock.h>
33
34
35/*******************************************************************************
36* Structures and Typedefs *
37*******************************************************************************/
38/**
39 * Ring-0 dynamic mapping cache segment.
40 *
41 * The dynamic mapping cache can be extended with additional segments if the
42 * load is found to be too high. This done the next time a VM is created, under
43 * the protection of the init mutex. The arrays is reallocated and the new
44 * segment is added to the end of these. Nothing is rehashed of course, as the
45 * indexes / addresses must remain unchanged.
46 *
47 * This structure is only modified while owning the init mutex or during module
48 * init / term.
49 */
50typedef struct PGMR0DYNMAPSEG
51{
52 /** Pointer to the next segment. */
53 struct PGMR0DYNMAPSEG *pNext;
54 /** The memory object for the virtual address range that we're abusing. */
55 RTR0MEMOBJ hMemObj;
56 /** The memory object for the page tables. */
57 RTR0MEMOBJ hMemObjPT;
58 /** The start page in the cache. (I.e. index into the arrays.) */
59 uint32_t iPage;
60 /** The number of pages this segment contributes. */
61 uint32_t cPages;
62} PGMR0DYNMAPSEG;
63/** Pointer to a ring-0 dynamic mapping cache segment. */
64typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
65
66
67/**
68 * Ring-0 dynamic mapping cache entry.
69 *
70 * This structure tracks
71 */
72typedef struct PGMR0DYNMAPENTRY
73{
74 /** The physical address of the currently mapped page.
75 * This is duplicate for three reasons: cache locality, cache policy of the PT
76 * mappings and sanity checks. */
77 RTHCPHYS HCPhys;
78 /** Pointer to the page. */
79 void *pvPage;
80 /** The number of references. */
81 int32_t volatile cRefs;
82 /** PTE pointer union. */
83 union PGMR0DYNMAPENTRY_PPTE
84 {
85 /** PTE pointer, 32-bit legacy version. */
86 PX86PTE pLegacy;
87 /** PTE pointer, PAE version. */
88 PX86PTEPAE pPae;
89 } uPte;
90 /** CPUs that haven't invalidated this entry after it's last update. */
91 RTCPUSET PendingSet;
92} PGMR0DYNMAPENTRY;
93/** Pointer to a ring-0 dynamic mapping cache entry. */
94typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
95
96
97/**
98 * Ring-0 dynamic mapping cache.
99 *
100 * This is initialized during VMMR0 module init but no segments are allocated at
101 * that time. Segments will be added when the first VM is started and removed
102 * again when the last VM shuts down, thus avoid consuming memory while dormant.
103 * At module termination, the remaining bits will be freed up.
104 */
105typedef struct PGMR0DYNMAP
106{
107 /** The usual magic number / eye catcher. */
108 uint32_t u32Magic;
109 /** Spinlock serializing the normal operation of the cache. */
110 RTSPINLOCK hSpinlock;
111 /** Array for tracking and managing the pages. */
112 PPGMR0DYNMAPENTRY paPages;
113 /** The cache size given as a number of pages. */
114 uint32_t cPages;
115 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
116 bool fLegacyMode;
117 /** The current load. */
118 uint32_t cLoad;
119 /** The max load.
120 * This is maintained to get trigger adding of more mapping space. */
121 uint32_t cMaxLoad;
122 /** Initialization / termination lock. */
123 RTSEMFASTMUTEX hInitLock;
124 /** The number of users (protected by hInitLock). */
125 uint32_t cUsers;
126 /** Array containing a copy of the original page tables.
127 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
128 void *pvSavedPTs;
129} PGMR0DYNMAP;
130/** Pointer to the ring-0 dynamic mapping cache */
131typedef PGMR0DYNMAP *PPGMR0DYNMAP;
132
133
134/*******************************************************************************
135* Global Variables *
136*******************************************************************************/
137/** Pointer to the ring-0 dynamic mapping cache. */
138static PPGMR0DYNMAP g_pPGMR0DynMap;
139
140
141
142
143/**
144 * Initializes the ring-0 dynamic mapping cache.
145 *
146 * @returns VBox status code.
147 */
148VMMR0DECL(int) PGMR0DynMapInit(void)
149{
150 return VINF_SUCCESS;
151}
152
153
154/**
155 * Terminates the ring-0 dynamic mapping cache.
156 */
157VMMR0DECL(void) PGMR0DynMapTerm(void)
158{
159}
160
161
162/**
163 * Initializes the dynamic mapping cache for a new VM.
164 *
165 * @returns VBox status code.
166 * @param pVM Pointer to the shared VM structure.
167 */
168VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
169{
170 NOREF(pVM);
171 return VINF_SUCCESS;
172}
173
174
175/**
176 * Terminates the dynamic mapping cache usage for a VM.
177 *
178 * @param pVM Pointer to the shared VM structure.
179 */
180VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
181{
182 NOREF(pVM);
183}
184
185
186/**
187 * Release references to a page, caller owns the spin lock.
188 *
189 * @param pThis The dynamic mapping cache instance.
190 * @param iPage The page.
191 * @param cRefs The number of references to release.
192 */
193DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
194{
195 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs);
196 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
197 if (!cRefs)
198 pThis->cLoad--;
199}
200
201
202/**
203 * Release references to a page, caller does not own the spin lock.
204 *
205 * @param pThis The dynamic mapping cache instance.
206 * @param iPage The page.
207 * @param cRefs The number of references to release.
208 */
209static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
210{
211 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
212 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
213 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
214 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
215}
216
217
218/**
219 * pgmR0DynMapPage worker that deals with the tedious bits.
220 *
221 * @returns The page index on success, UINT32_MAX on failure.
222 * @param pThis The dynamic mapping cache instance.
223 * @param HCPhys The address of the page to be mapped.
224 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
225 */
226static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage)
227{
228 /*
229 * Check if any of the first 5 pages are unreferenced since the caller
230 * already has made sure they aren't matching.
231 */
232 uint32_t const cPages = cPages;
233 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
234 uint32_t iFreePage;
235 if (!paPages[iPage].cRefs)
236 iFreePage = iPage;
237 else if (!paPages[(iPage + 1) % cPages].cRefs)
238 iFreePage = iPage;
239 else if (!paPages[(iPage + 2) % cPages].cRefs)
240 iFreePage = iPage;
241 else if (!paPages[(iPage + 3) % cPages].cRefs)
242 iFreePage = iPage;
243 else if (!paPages[(iPage + 4) % cPages].cRefs)
244 iFreePage = iPage;
245 else
246 {
247 /*
248 * Search for an unused or matching entry.
249 */
250 iFreePage = (iPage + 5) % pThis->cPages;
251 for (;;)
252 {
253 if (paPages[iFreePage].HCPhys == HCPhys)
254 return iFreePage;
255 if (!paPages[iFreePage].cRefs)
256 break;
257
258 /* advance */
259 iFreePage = (iFreePage + 1) % cPages;
260 if (RT_UNLIKELY(iFreePage != iPage))
261 return UINT32_MAX;
262 }
263 }
264
265 /*
266 * Setup the new entry.
267 */
268 paPages[iFreePage].HCPhys = HCPhys;
269 RTCpuSetFill(&paPages[iFreePage].PendingSet);
270 if (pThis->fLegacyMode)
271 paPages[iFreePage].uPte.pLegacy->u = (paPages[iFreePage].uPte.pLegacy->u & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
272 | X86_PTE_P | X86_PTE_A | X86_PTE_D
273 | (HCPhys & X86_PTE_PG_MASK);
274 else
275 paPages[iFreePage].uPte.pPae->u = (paPages[iFreePage].uPte.pPae->u & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
276 | X86_PTE_P | X86_PTE_A | X86_PTE_D
277 | (HCPhys & X86_PTE_PAE_PG_MASK);
278 return iFreePage;
279}
280
281
282/**
283 * Maps a page into the pool.
284 *
285 * @returns Pointer to the mapping.
286 * @param pThis The dynamic mapping cache instance.
287 * @param HCPhys The address of the page to be mapped.
288 * @param piPage Where to store the page index.
289 */
290DECLINLINE(void *) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t *piPage)
291{
292 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
293 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
294 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
295
296 /*
297 * Find an entry, if possible a matching one. The HCPhys address is hashed
298 * down to a page index, collisions are handled by linear searching. Optimize
299 * for a hit in the first 5 pages.
300 *
301 * To the cheap hits here and defer the tedious searching and inserting
302 * to a helper function.
303 */
304 uint32_t const cPages = cPages;
305 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
306 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
307 if (paPages[iPage].HCPhys != HCPhys)
308 {
309 uint32_t iPage2 = (iPage + 1) % cPages;
310 if (paPages[iPage2].HCPhys != HCPhys)
311 {
312 iPage2 = (iPage + 2) % cPages;
313 if (paPages[iPage2].HCPhys != HCPhys)
314 {
315 iPage2 = (iPage + 3) % cPages;
316 if (paPages[iPage2].HCPhys != HCPhys)
317 {
318 iPage2 = (iPage + 4) % cPages;
319 if (paPages[iPage2].HCPhys != HCPhys)
320 {
321 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage);
322 if (RT_UNLIKELY(iPage == UINT32_MAX))
323 {
324 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
325 return NULL;
326 }
327 }
328 else
329 iPage = iPage2;
330 }
331 else
332 iPage = iPage2;
333 }
334 else
335 iPage = iPage2;
336 }
337 else
338 iPage = iPage2;
339 }
340
341 /*
342 * Reference it, update statistics and get the return address.
343 */
344 if (ASMAtomicIncS32(&paPages[iPage].cRefs) == 1)
345 {
346 pThis->cLoad++;
347 if (pThis->cLoad > pThis->cMaxLoad)
348 pThis->cMaxLoad = pThis->cLoad;
349 Assert(pThis->cLoad <= pThis->cPages);
350 }
351 void *pvPage = paPages[iPage].pvPage;
352
353 /*
354 * Invalidate the entry?
355 */
356 RTCPUID idRealCpu = RTMpCpuId();
357 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
358 if (fInvalidateIt)
359 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
360
361 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
362
363 /*
364 * Do the actual invalidation outside the spinlock.
365 */
366 ASMInvalidatePage(pvPage);
367
368 *piPage = iPage;
369 return pvPage;
370}
371
372
373/**
374 * Signals the start of a new set of mappings.
375 *
376 * Mostly for strictness. PGMDynMapHCPage won't work unless this
377 * API is called.
378 *
379 * @param pVCpu The shared data for the current virtual CPU.
380 */
381VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
382{
383 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
384 pVCpu->pgm.s.AutoSet.cEntries = 0;
385}
386
387
388/**
389 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
390 * since the PGMDynMapStartAutoSet call.
391 *
392 * @param pVCpu The shared data for the current virtual CPU.
393 */
394VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
395{
396 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
397
398 /* close the set */
399 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
400 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%u\n", i));
401 pVCpu->pgm.s.AutoSet.cEntries = PGMMAPSET_CLOSED;
402
403 /* release any pages we're referencing. */
404 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
405 {
406 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
407 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
408 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
409
410 while (i-- > 0)
411 {
412 uint32_t iPage = pSet->aEntries[i].iPage;
413 Assert(iPage < pThis->cPages);
414 int32_t cRefs = pSet->aEntries[i].cRefs;
415 Assert(cRefs > 0);
416 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
417 }
418
419 Assert(pThis->cLoad <= pThis->cPages);
420 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
421 }
422}
423
424
425/**
426 * Migrates the automatic mapping set of the current vCPU if necessary.
427 *
428 * This is called when re-entering the hardware assisted execution mode after a
429 * nip down to ring-3. We run the risk that the CPU might have change and we
430 * will therefore make sure all the cache entries currently in the auto set will
431 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
432 * the entries will have been flagged as invalidated.
433 *
434 * @param pVCpu The shared data for the current virtual CPU.
435 * @thread EMT
436 */
437VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
438{
439 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
440 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
441 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%u\n", i));
442 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
443 {
444 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
445 RTCPUID idRealCpu = RTMpCpuId();
446
447 while (i-- > 0)
448 {
449 Assert(pSet->aEntries[i].cRefs > 0);
450 uint32_t iPage = pSet->aEntries[i].iPage;
451 Assert(iPage < pThis->cPages);
452 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
453 {
454 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
455 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
456 }
457 }
458 }
459}
460
461
462/**
463 * As a final resort for a full auto set, try merge duplicate entries.
464 *
465 * @param pSet The set.
466 */
467static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
468{
469 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
470 {
471 uint16_t const iPage = pSet->aEntries[i].iPage;
472 uint32_t j = i + 1;
473 while (j < pSet->cEntries)
474 {
475 if (pSet->aEntries[j].iPage != iPage)
476 j++;
477 else
478 {
479 /* merge j with i removing j. */
480 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
481 pSet->cEntries--;
482 if (j < pSet->cEntries)
483 {
484 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
485 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
486 pSet->aEntries[pSet->cEntries].cRefs = 0;
487 }
488 else
489 {
490 pSet->aEntries[j].iPage = UINT16_MAX;
491 pSet->aEntries[j].cRefs = 0;
492 }
493 }
494 }
495 }
496}
497
498
499/* documented elsewhere - a bit of a mess. */
500VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
501{
502 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
503
504 /*
505 * Map it.
506 */
507 uint32_t iPage;
508 void *pvPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, &iPage);
509 if (RT_UNLIKELY(!pvPage))
510 {
511 static uint32_t s_cBitched = 0;
512 if (++s_cBitched < 10)
513 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u\n",
514 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages));
515 return VERR_PGM_DYNMAP_FAILED;
516 }
517
518 /*
519 * Add the page to the auto reference set.
520 * If it's less than half full, don't bother looking for duplicates.
521 */
522 PVMCPU pVCpu = VMMGetCpu(pVM);
523 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
524 if (pSet->cEntries < RT_ELEMENTS(pSet->aEntries) / 2)
525 {
526 pSet->aEntries[pSet->cEntries].cRefs = 1;
527 pSet->aEntries[pSet->cEntries].iPage = iPage;
528 }
529 else
530 {
531 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
532 int32_t i = pSet->cEntries;
533 while (i-- > 0)
534 if (pSet->aEntries[i].iPage)
535 {
536 pSet->aEntries[i].cRefs++;
537 break;
538 }
539 if (i < 0)
540 {
541 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
542 pgmDynMapOptimizeAutoSet(pSet);
543 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
544 {
545 pSet->aEntries[pSet->cEntries].cRefs = 1;
546 pSet->aEntries[pSet->cEntries].iPage = iPage;
547 }
548 else
549 {
550 /* We're screwed. */
551 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
552
553 static uint32_t s_cBitched = 0;
554 if (++s_cBitched < 10)
555 LogRel(("PGMDynMapHCPage: set is full!\n"));
556 return VERR_PGM_DYNMAP_FULL_SET;
557 }
558 }
559 }
560
561 return VINF_SUCCESS;
562}
563
564
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette