VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 15144

Last change on this file since 15144 was 14880, checked in by vboxsync, 16 years ago

PGMR0DynMap: nit picking.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 70.1 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14880 2008-12-01 17:51:53Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 256
52/** The minimum number of pages we reserve per CPU.
53 * This must be equal or larger than the autoset size. */
54#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 32
55/** The number of guard pages.
56 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
57#if defined(VBOX_STRICT)
58# define PGMR0DYNMAP_GUARD_PAGES 1
59#else
60# define PGMR0DYNMAP_GUARD_PAGES 0
61#endif
62/** The dummy physical address of guard pages. */
63#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
64/** The dummy reference count of guard pages. (Must be non-zero.) */
65#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
66#if 0
67/** Define this to just clear the present bit on guard pages.
68 * The alternative is to replace the entire PTE with an bad not-present
69 * PTE. Either way, XNU will screw us. :-/ */
70#define PGMR0DYNMAP_GUARD_NP
71#endif
72/** The dummy PTE value for a page. */
73#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
74/** The dummy PTE value for a page. */
75#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
76/** Calcs the overload threshold. Current set at 50%. */
77#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
78
79#if 0
80/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
81//#define RTSpinlockAcquire(a,b) do {} while (0)
82//#define RTSpinlockRelease(a,b) do {} while (0)
83#endif
84
85
86/*******************************************************************************
87* Structures and Typedefs *
88*******************************************************************************/
89/**
90 * Ring-0 dynamic mapping cache segment.
91 *
92 * The dynamic mapping cache can be extended with additional segments if the
93 * load is found to be too high. This done the next time a VM is created, under
94 * the protection of the init mutex. The arrays is reallocated and the new
95 * segment is added to the end of these. Nothing is rehashed of course, as the
96 * indexes / addresses must remain unchanged.
97 *
98 * This structure is only modified while owning the init mutex or during module
99 * init / term.
100 */
101typedef struct PGMR0DYNMAPSEG
102{
103 /** Pointer to the next segment. */
104 struct PGMR0DYNMAPSEG *pNext;
105 /** The memory object for the virtual address range that we're abusing. */
106 RTR0MEMOBJ hMemObj;
107 /** The start page in the cache. (I.e. index into the arrays.) */
108 uint16_t iPage;
109 /** The number of pages this segment contributes. */
110 uint16_t cPages;
111 /** The number of page tables. */
112 uint16_t cPTs;
113 /** The memory objects for the page tables. */
114 RTR0MEMOBJ ahMemObjPTs[1];
115} PGMR0DYNMAPSEG;
116/** Pointer to a ring-0 dynamic mapping cache segment. */
117typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
118
119
120/**
121 * Ring-0 dynamic mapping cache entry.
122 *
123 * This structure tracks
124 */
125typedef struct PGMR0DYNMAPENTRY
126{
127 /** The physical address of the currently mapped page.
128 * This is duplicate for three reasons: cache locality, cache policy of the PT
129 * mappings and sanity checks. */
130 RTHCPHYS HCPhys;
131 /** Pointer to the page. */
132 void *pvPage;
133 /** The number of references. */
134 int32_t volatile cRefs;
135 /** PTE pointer union. */
136 union PGMR0DYNMAPENTRY_PPTE
137 {
138 /** PTE pointer, 32-bit legacy version. */
139 PX86PTE pLegacy;
140 /** PTE pointer, PAE version. */
141 PX86PTEPAE pPae;
142 /** PTE pointer, the void version. */
143 void *pv;
144 } uPte;
145 /** CPUs that haven't invalidated this entry after it's last update. */
146 RTCPUSET PendingSet;
147} PGMR0DYNMAPENTRY;
148/** Pointer to a ring-0 dynamic mapping cache entry. */
149typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
150
151
152/**
153 * Ring-0 dynamic mapping cache.
154 *
155 * This is initialized during VMMR0 module init but no segments are allocated at
156 * that time. Segments will be added when the first VM is started and removed
157 * again when the last VM shuts down, thus avoid consuming memory while dormant.
158 * At module termination, the remaining bits will be freed up.
159 */
160typedef struct PGMR0DYNMAP
161{
162 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
163 uint32_t u32Magic;
164 /** Spinlock serializing the normal operation of the cache. */
165 RTSPINLOCK hSpinlock;
166 /** Array for tracking and managing the pages. */
167 PPGMR0DYNMAPENTRY paPages;
168 /** The cache size given as a number of pages. */
169 uint32_t cPages;
170 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
171 bool fLegacyMode;
172 /** The current load.
173 * This does not include guard pages. */
174 uint32_t cLoad;
175 /** The max load ever.
176 * This is maintained to get trigger adding of more mapping space. */
177 uint32_t cMaxLoad;
178 /** Initialization / termination lock. */
179 RTSEMFASTMUTEX hInitLock;
180 /** The number of guard pages. */
181 uint32_t cGuardPages;
182 /** The number of users (protected by hInitLock). */
183 uint32_t cUsers;
184 /** Array containing a copy of the original page tables.
185 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
186 void *pvSavedPTEs;
187 /** List of segments. */
188 PPGMR0DYNMAPSEG pSegHead;
189 /** The paging mode. */
190 SUPPAGINGMODE enmPgMode;
191} PGMR0DYNMAP;
192/** Pointer to the ring-0 dynamic mapping cache */
193typedef PGMR0DYNMAP *PPGMR0DYNMAP;
194
195/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
196#define PGMR0DYNMAP_MAGIC 0x19640201
197
198
199/**
200 * Paging level data.
201 */
202typedef struct PGMR0DYNMAPPGLVL
203{
204 uint32_t cLevels; /**< The number of levels. */
205 struct
206 {
207 RTHCPHYS HCPhys; /**< The address of the page for the current level,
208 * i.e. what hMemObj/hMapObj is currently mapping. */
209 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
210 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
211 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
212 uint32_t fPtrShift; /**< The pointer shift count. */
213 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
214 uint64_t fAndMask; /**< And mask to check entry flags. */
215 uint64_t fResMask; /**< The result from applying fAndMask. */
216 union
217 {
218 void *pv; /**< hMapObj address. */
219 PX86PGUINT paLegacy; /**< Legacy table view. */
220 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
221 } u;
222 } a[4];
223} PGMR0DYNMAPPGLVL;
224/** Pointer to paging level data. */
225typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
226
227
228/*******************************************************************************
229* Global Variables *
230*******************************************************************************/
231/** Pointer to the ring-0 dynamic mapping cache. */
232static PPGMR0DYNMAP g_pPGMR0DynMap;
233
234
235/*******************************************************************************
236* Internal Functions *
237*******************************************************************************/
238static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
239static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
240static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
241static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
242#ifdef DEBUG
243static int pgmR0DynMapTest(PVM pVM);
244#endif
245
246
247/**
248 * Initializes the ring-0 dynamic mapping cache.
249 *
250 * @returns VBox status code.
251 */
252VMMR0DECL(int) PGMR0DynMapInit(void)
253{
254 Assert(!g_pPGMR0DynMap);
255
256 /*
257 * Create and initialize the cache instance.
258 */
259 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
260 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
261 int rc = VINF_SUCCESS;
262 pThis->enmPgMode = SUPR0GetPagingMode();
263 switch (pThis->enmPgMode)
264 {
265 case SUPPAGINGMODE_32_BIT:
266 case SUPPAGINGMODE_32_BIT_GLOBAL:
267 pThis->fLegacyMode = false;
268 break;
269 case SUPPAGINGMODE_PAE:
270 case SUPPAGINGMODE_PAE_GLOBAL:
271 case SUPPAGINGMODE_PAE_NX:
272 case SUPPAGINGMODE_PAE_GLOBAL_NX:
273 case SUPPAGINGMODE_AMD64:
274 case SUPPAGINGMODE_AMD64_GLOBAL:
275 case SUPPAGINGMODE_AMD64_NX:
276 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
277 pThis->fLegacyMode = false;
278 break;
279 default:
280 rc = VERR_INTERNAL_ERROR;
281 break;
282 }
283 if (RT_SUCCESS(rc))
284 {
285 rc = RTSemFastMutexCreate(&pThis->hInitLock);
286 if (RT_SUCCESS(rc))
287 {
288 rc = RTSpinlockCreate(&pThis->hSpinlock);
289 if (RT_SUCCESS(rc))
290 {
291 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
292 g_pPGMR0DynMap = pThis;
293 return VINF_SUCCESS;
294 }
295 RTSemFastMutexDestroy(pThis->hInitLock);
296 }
297 }
298 RTMemFree(pThis);
299 return rc;
300}
301
302
303/**
304 * Terminates the ring-0 dynamic mapping cache.
305 */
306VMMR0DECL(void) PGMR0DynMapTerm(void)
307{
308 /*
309 * Destroy the cache.
310 *
311 * There is not supposed to be any races here, the loader should
312 * make sure about that. So, don't bother locking anything.
313 *
314 * The VM objects should all be destroyed by now, so there is no
315 * dangling users or anything like that to clean up. This routine
316 * is just a mirror image of PGMR0DynMapInit.
317 */
318 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
319 if (pThis)
320 {
321 AssertPtr(pThis);
322 g_pPGMR0DynMap = NULL;
323
324 /* This should *never* happen, but in case it does try not to leak memory. */
325 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
326 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
327 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
328 if (pThis->paPages)
329 pgmR0DynMapTearDown(pThis);
330
331 /* Free the associated resources. */
332 RTSemFastMutexDestroy(pThis->hInitLock);
333 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
334 RTSpinlockDestroy(pThis->hSpinlock);
335 pThis->hSpinlock = NIL_RTSPINLOCK;
336 pThis->u32Magic = UINT32_MAX;
337 RTMemFree(pThis);
338 }
339}
340
341
342/**
343 * Initializes the dynamic mapping cache for a new VM.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the shared VM structure.
347 */
348VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
349{
350 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
351
352 /*
353 * Initialize the auto sets.
354 */
355 VMCPUID idCpu = pVM->cCPUs;
356 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
357 while (idCpu-- > 0)
358 {
359 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
360 uint32_t j = RT_ELEMENTS(pSet->aEntries);
361 while (j-- > 0)
362 {
363 pSet->aEntries[j].iPage = UINT16_MAX;
364 pSet->aEntries[j].cRefs = 0;
365 }
366 pSet->cEntries = PGMMAPSET_CLOSED;
367 }
368
369 /*
370 * Do we need the cache? Skip the last bit if we don't.
371 */
372 if (!VMMIsHwVirtExtForced(pVM))
373 return VINF_SUCCESS;
374
375 /*
376 * Reference and if necessary setup or expand the cache.
377 */
378 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
379 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
380 int rc = RTSemFastMutexRequest(pThis->hInitLock);
381 AssertLogRelRCReturn(rc, rc);
382
383 pThis->cUsers++;
384 if (pThis->cUsers == 1)
385 {
386 rc = pgmR0DynMapSetup(pThis);
387#ifdef DEBUG
388 if (RT_SUCCESS(rc))
389 {
390 rc = pgmR0DynMapTest(pVM);
391 if (RT_FAILURE(rc))
392 pgmR0DynMapTearDown(pThis);
393 }
394#endif
395 }
396 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
397 rc = pgmR0DynMapExpand(pThis);
398 if (RT_SUCCESS(rc))
399 pVM->pgm.s.pvR0DynMapUsed = pThis;
400 else
401 pThis->cUsers--;
402
403 RTSemFastMutexRelease(pThis->hInitLock);
404 return rc;
405}
406
407
408/**
409 * Terminates the dynamic mapping cache usage for a VM.
410 *
411 * @param pVM Pointer to the shared VM structure.
412 */
413VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
414{
415 /*
416 * Return immediately if we're not using the cache.
417 */
418 if (!pVM->pgm.s.pvR0DynMapUsed)
419 return;
420
421 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
422 AssertPtrReturnVoid(pThis);
423
424 int rc = RTSemFastMutexRequest(pThis->hInitLock);
425 AssertLogRelRCReturnVoid(rc);
426
427 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
428 {
429 pVM->pgm.s.pvR0DynMapUsed = NULL;
430
431#ifdef VBOX_STRICT
432 PGMR0DynMapAssertIntegrity();
433#endif
434
435 /*
436 * Clean up and check the auto sets.
437 */
438 VMCPUID idCpu = pVM->cCPUs;
439 while (idCpu-- > 0)
440 {
441 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
442 uint32_t j = pSet->cEntries;
443 if (j <= RT_ELEMENTS(pSet->aEntries))
444 {
445 /*
446 * The set is open, close it.
447 */
448 while (j-- > 0)
449 {
450 int32_t cRefs = pSet->aEntries[j].cRefs;
451 uint32_t iPage = pSet->aEntries[j].iPage;
452 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
453 if (iPage < pThis->cPages && cRefs > 0)
454 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
455 else
456 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
457
458 pSet->aEntries[j].iPage = UINT16_MAX;
459 pSet->aEntries[j].cRefs = 0;
460 }
461 pSet->cEntries = PGMMAPSET_CLOSED;
462 }
463 else
464 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
465
466 j = RT_ELEMENTS(pSet->aEntries);
467 while (j-- > 0)
468 {
469 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
470 Assert(!pSet->aEntries[j].cRefs);
471 }
472 }
473
474 /*
475 * Release our reference to the mapping cache.
476 */
477 Assert(pThis->cUsers > 0);
478 pThis->cUsers--;
479 if (!pThis->cUsers)
480 pgmR0DynMapTearDown(pThis);
481 }
482 else
483 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
484
485 RTSemFastMutexRelease(pThis->hInitLock);
486}
487
488
489/**
490 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
491 *
492 * @param idCpu The current CPU.
493 * @param pvUser1 The dynamic mapping cache instance.
494 * @param pvUser2 Unused, NULL.
495 */
496static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
497{
498 Assert(!pvUser2);
499 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
500 Assert(pThis == g_pPGMR0DynMap);
501 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
502 uint32_t iPage = pThis->cPages;
503 while (iPage-- > 0)
504 ASMInvalidatePage(paPages[iPage].pvPage);
505}
506
507
508/**
509 * Shoot down the TLBs for every single cache entry on all CPUs.
510 *
511 * @returns IPRT status code (RTMpOnAll).
512 * @param pThis The dynamic mapping cache instance.
513 */
514static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
515{
516 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
517 AssertRC(rc);
518 if (RT_FAILURE(rc))
519 {
520 uint32_t iPage = pThis->cPages;
521 while (iPage-- > 0)
522 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
523 }
524 return rc;
525}
526
527
528/**
529 * Calculate the new cache size based on cMaxLoad statistics.
530 *
531 * @returns Number of pages.
532 * @param pThis The dynamic mapping cache instance.
533 * @param pcMinPages The minimal size in pages.
534 */
535static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
536{
537 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
538
539 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
540 RTCPUID cCpus = RTMpGetCount();
541 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
542 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
543 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
544
545 /* adjust against cMaxLoad. */
546 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
547 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
548 pThis->cMaxLoad = 0;
549
550 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
551 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
552
553 if (pThis->cMaxLoad > cMinPages)
554 cMinPages = pThis->cMaxLoad;
555
556 /* adjust against max and current size. */
557 if (cPages < pThis->cPages)
558 cPages = pThis->cPages;
559 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
560 if (cPages > PGMR0DYNMAP_MAX_PAGES)
561 cPages = PGMR0DYNMAP_MAX_PAGES;
562
563 if (cMinPages < pThis->cPages)
564 cMinPages = pThis->cPages;
565 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
566 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
567 cMinPages = PGMR0DYNMAP_MAX_PAGES;
568
569 Assert(cMinPages);
570 *pcMinPages = cMinPages;
571 return cPages;
572}
573
574
575/**
576 * Initializes the paging level data.
577 *
578 * @param pThis The dynamic mapping cache instance.
579 * @param pPgLvl The paging level data.
580 */
581void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
582{
583 RTCCUINTREG cr4 = ASMGetCR4();
584 switch (pThis->enmPgMode)
585 {
586 case SUPPAGINGMODE_32_BIT:
587 case SUPPAGINGMODE_32_BIT_GLOBAL:
588 pPgLvl->cLevels = 2;
589 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
590 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
591 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
592 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
593 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
594
595 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
596 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
597 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
598 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
599 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
600 break;
601
602 case SUPPAGINGMODE_PAE:
603 case SUPPAGINGMODE_PAE_GLOBAL:
604 case SUPPAGINGMODE_PAE_NX:
605 case SUPPAGINGMODE_PAE_GLOBAL_NX:
606 pPgLvl->cLevels = 3;
607 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
608 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
609 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
610 pPgLvl->a[0].fAndMask = X86_PDPE_P;
611 pPgLvl->a[0].fResMask = X86_PDPE_P;
612
613 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
614 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
615 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
616 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
617 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
618
619 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
620 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
621 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
622 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
623 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
624 break;
625
626 case SUPPAGINGMODE_AMD64:
627 case SUPPAGINGMODE_AMD64_GLOBAL:
628 case SUPPAGINGMODE_AMD64_NX:
629 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
630 pPgLvl->cLevels = 4;
631 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
632 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
633 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
634 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
635 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
636
637 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
638 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
639 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
640 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
641 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
642
643 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
644 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
645 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
646 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
647 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
648
649 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
650 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
651 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
652 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
653 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
654 break;
655
656 default:
657 AssertFailed();
658 pPgLvl->cLevels = 0;
659 break;
660 }
661
662 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
663 {
664 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
665 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
666 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
667 pPgLvl->a[i].u.pv = NULL;
668 }
669}
670
671
672/**
673 * Maps a PTE.
674 *
675 * This will update the segment structure when new PTs are mapped.
676 *
677 * It also assumes that we (for paranoid reasons) wish to establish a mapping
678 * chain from CR3 to the PT that all corresponds to the processor we're
679 * currently running on, and go about this by running with interrupts disabled
680 * and restarting from CR3 for every change.
681 *
682 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
683 * to re-enable interrupts.
684 * @param pThis The dynamic mapping cache instance.
685 * @param pPgLvl The paging level structure.
686 * @param pvPage The page.
687 * @param pSeg The segment.
688 * @param cMaxPTs The max number of PTs expected in the segment.
689 * @param ppvPTE Where to store the PTE address.
690 */
691static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
692 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
693{
694 Assert(!(ASMGetFlags() & X86_EFL_IF));
695 void *pvEntry = NULL;
696 X86PGPAEUINT uEntry = ASMGetCR3();
697 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
698 {
699 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
700 if (pPgLvl->a[i].HCPhys != HCPhys)
701 {
702 /*
703 * Need to remap this level.
704 * The final level, the PT, will not be freed since that is what it's all about.
705 */
706 ASMIntEnable();
707 if (i + 1 == pPgLvl->cLevels)
708 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
709 else
710 {
711 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
712 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
713 }
714
715 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
716 if (RT_SUCCESS(rc))
717 {
718 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
719 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
720 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
721 if (RT_SUCCESS(rc))
722 {
723 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
724 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
725 pPgLvl->a[i].HCPhys = HCPhys;
726 if (i + 1 == pPgLvl->cLevels)
727 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
728 ASMIntDisable();
729 return VINF_TRY_AGAIN;
730 }
731
732 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
733 }
734 else
735 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
736 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
737 return rc;
738 }
739
740 /*
741 * The next level.
742 */
743 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
744 if (pThis->fLegacyMode)
745 {
746 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
747 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
748 }
749 else
750 {
751 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
752 uEntry = pPgLvl->a[i].u.paPae[iEntry];
753 }
754
755 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
756 {
757 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
758 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
759 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
760 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
761 return VERR_INTERNAL_ERROR;
762 }
763 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
764 }
765
766 /* made it thru without needing to remap anything. */
767 *ppvPTE = pvEntry;
768 return VINF_SUCCESS;
769}
770
771
772/**
773 * Sets up a guard page.
774 *
775 * @param pThis The dynamic mapping cache instance.
776 * @param pPage The page.
777 */
778DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
779{
780 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
781 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
782 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
783#ifdef PGMR0DYNMAP_GUARD_NP
784 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
785#else
786 if (pThis->fLegacyMode)
787 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
788 else
789 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
790#endif
791 pThis->cGuardPages++;
792}
793
794
795/**
796 * Adds a new segment of the specified size.
797 *
798 * @returns VBox status code.
799 * @param pThis The dynamic mapping cache instance.
800 * @param cPages The size of the new segment, give as a page count.
801 */
802static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
803{
804 int rc2;
805 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
806
807 /*
808 * Do the array reallocations first.
809 * (The pages array has to be replaced behind the spinlock of course.)
810 */
811 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
812 if (!pvSavedPTEs)
813 return VERR_NO_MEMORY;
814 pThis->pvSavedPTEs = pvSavedPTEs;
815
816 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
817 if (!pvPages)
818 {
819 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
820 if (pvSavedPTEs)
821 pThis->pvSavedPTEs = pvSavedPTEs;
822 return VERR_NO_MEMORY;
823 }
824
825 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
826 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
827
828 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
829 void *pvToFree = pThis->paPages;
830 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
831
832 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
833 RTMemFree(pvToFree);
834
835 /*
836 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
837 */
838 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
839 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
840 if (!pSeg)
841 return VERR_NO_MEMORY;
842 pSeg->pNext = NULL;
843 pSeg->cPages = cPages;
844 pSeg->iPage = pThis->cPages;
845 pSeg->cPTs = 0;
846 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
847 if (RT_SUCCESS(rc))
848 {
849 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
850 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
851 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
852
853 /*
854 * Walk thru the pages and set them up with a mapping of their PTE and everything.
855 */
856 ASMIntDisable();
857 PGMR0DYNMAPPGLVL PgLvl;
858 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
859 uint32_t const iEndPage = pSeg->iPage + cPages;
860 for (uint32_t iPage = pSeg->iPage;
861 iPage < iEndPage;
862 iPage++, pbPage += PAGE_SIZE)
863 {
864 /* Initialize the page data. */
865 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
866 pThis->paPages[iPage].pvPage = pbPage;
867 pThis->paPages[iPage].cRefs = 0;
868 pThis->paPages[iPage].uPte.pPae = 0;
869 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
870
871 /* Map its page table, retry until we've got a clean run (paranoia). */
872 do
873 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
874 &pThis->paPages[iPage].uPte.pv);
875 while (rc == VINF_TRY_AGAIN);
876 if (RT_FAILURE(rc))
877 break;
878
879 /* Save the PTE. */
880 if (pThis->fLegacyMode)
881 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
882 else
883 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
884
885#ifdef VBOX_STRICT
886 /* Check that we've got the right entry. */
887 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
888 RTHCPHYS HCPhysPte = pThis->fLegacyMode
889 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
890 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
891 if (HCPhysPage != HCPhysPte)
892 {
893 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
894 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
895 rc = VERR_INTERNAL_ERROR;
896 break;
897 }
898#endif
899 } /* for each page */
900 ASMIntEnable();
901
902 /* cleanup non-PT mappings */
903 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
904 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
905
906 if (RT_SUCCESS(rc))
907 {
908#if PGMR0DYNMAP_GUARD_PAGES > 0
909 /*
910 * Setup guard pages.
911 * (Note: TLBs will be shot down later on.)
912 */
913 uint32_t iPage = pSeg->iPage;
914 while (iPage < iEndPage)
915 {
916 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
917 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
918 iPage++; /* the guarded page */
919 }
920
921 /* Make sure the very last page is a guard page too. */
922 iPage = iEndPage - 1;
923 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
924 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
925#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
926
927 /*
928 * Commit it by adding the segment to the list and updating the page count.
929 */
930 pSeg->pNext = pThis->pSegHead;
931 pThis->pSegHead = pSeg;
932 pThis->cPages += cPages;
933 return VINF_SUCCESS;
934 }
935
936 /*
937 * Bail out.
938 */
939 while (pSeg->cPTs-- > 0)
940 {
941 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
942 AssertRC(rc2);
943 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
944 }
945
946 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
947 AssertRC(rc2);
948 pSeg->hMemObj = NIL_RTR0MEMOBJ;
949 }
950 RTMemFree(pSeg);
951
952 /* Don't bother resizing the arrays, but free them if we're the only user. */
953 if (!pThis->cPages)
954 {
955 RTMemFree(pThis->paPages);
956 pThis->paPages = NULL;
957 RTMemFree(pThis->pvSavedPTEs);
958 pThis->pvSavedPTEs = NULL;
959 }
960 return rc;
961}
962
963
964/**
965 * Called by PGMR0DynMapInitVM under the init lock.
966 *
967 * @returns VBox status code.
968 * @param pThis The dynamic mapping cache instance.
969 */
970static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
971{
972 /*
973 * Calc the size and add a segment of that size.
974 */
975 uint32_t cMinPages;
976 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
977 AssertReturn(cPages, VERR_INTERNAL_ERROR);
978 int rc = pgmR0DynMapAddSeg(pThis, cPages);
979 if (rc == VERR_NO_MEMORY)
980 {
981 /*
982 * Try adding smaller segments.
983 */
984 do
985 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
986 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
987 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
988 rc = VINF_SUCCESS;
989 if (rc == VERR_NO_MEMORY)
990 {
991 if (pThis->cPages)
992 pgmR0DynMapTearDown(pThis);
993 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
994 }
995 }
996 Assert(ASMGetFlags() & X86_EFL_IF);
997
998#if PGMR0DYNMAP_GUARD_PAGES > 0
999 /* paranoia */
1000 if (RT_SUCCESS(rc))
1001 pgmR0DynMapTlbShootDown(pThis);
1002#endif
1003 return rc;
1004}
1005
1006
1007/**
1008 * Called by PGMR0DynMapInitVM under the init lock.
1009 *
1010 * @returns VBox status code.
1011 * @param pThis The dynamic mapping cache instance.
1012 */
1013static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1014{
1015 /*
1016 * Calc the new target size and add a segment of the appropriate size.
1017 */
1018 uint32_t cMinPages;
1019 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1020 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1021 if (pThis->cPages >= cPages)
1022 return VINF_SUCCESS;
1023
1024 uint32_t cAdd = cPages - pThis->cPages;
1025 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1026 if (rc == VERR_NO_MEMORY)
1027 {
1028 /*
1029 * Try adding smaller segments.
1030 */
1031 do
1032 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1033 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1034 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1035 rc = VINF_SUCCESS;
1036 if (rc == VERR_NO_MEMORY)
1037 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1038 }
1039 Assert(ASMGetFlags() & X86_EFL_IF);
1040
1041#if PGMR0DYNMAP_GUARD_PAGES > 0
1042 /* paranoia */
1043 if (RT_SUCCESS(rc))
1044 pgmR0DynMapTlbShootDown(pThis);
1045#endif
1046 return rc;
1047}
1048
1049
1050/**
1051 * Called by PGMR0DynMapTermVM under the init lock.
1052 *
1053 * @returns VBox status code.
1054 * @param pThis The dynamic mapping cache instance.
1055 */
1056static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1057{
1058 /*
1059 * Restore the original page table entries
1060 */
1061 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1062 uint32_t iPage = pThis->cPages;
1063 if (pThis->fLegacyMode)
1064 {
1065 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1066 while (iPage-- > 0)
1067 {
1068 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1069 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1070 X86PGUINT uNew = paSavedPTEs[iPage];
1071 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1072 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1073 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1074 }
1075 }
1076 else
1077 {
1078 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1079 while (iPage-- > 0)
1080 {
1081 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1082 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1083 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1084 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1085 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1086 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1087 }
1088 }
1089
1090 /*
1091 * Shoot down the TLBs on all CPUs before freeing them.
1092 */
1093 pgmR0DynMapTlbShootDown(pThis);
1094
1095 /*
1096 * Free the segments.
1097 */
1098 while (pThis->pSegHead)
1099 {
1100 int rc;
1101 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1102 pThis->pSegHead = pSeg->pNext;
1103
1104 uint32_t iPT = pSeg->cPTs;
1105 while (iPT-- > 0)
1106 {
1107 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1108 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1109 }
1110 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1111 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1112 pSeg->pNext = NULL;
1113 pSeg->iPage = UINT16_MAX;
1114 pSeg->cPages = 0;
1115 pSeg->cPTs = 0;
1116 RTMemFree(pSeg);
1117 }
1118
1119 /*
1120 * Free the arrays and restore the initial state.
1121 * The cLoadMax value is left behind for the next setup.
1122 */
1123 RTMemFree(pThis->paPages);
1124 pThis->paPages = NULL;
1125 RTMemFree(pThis->pvSavedPTEs);
1126 pThis->pvSavedPTEs = NULL;
1127 pThis->cPages = 0;
1128 pThis->cLoad = 0;
1129 pThis->cGuardPages = 0;
1130}
1131
1132
1133/**
1134 * Release references to a page, caller owns the spin lock.
1135 *
1136 * @param pThis The dynamic mapping cache instance.
1137 * @param iPage The page.
1138 * @param cRefs The number of references to release.
1139 */
1140DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1141{
1142 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1143 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1144 if (!cRefs)
1145 pThis->cLoad--;
1146}
1147
1148
1149/**
1150 * Release references to a page, caller does not own the spin lock.
1151 *
1152 * @param pThis The dynamic mapping cache instance.
1153 * @param iPage The page.
1154 * @param cRefs The number of references to release.
1155 */
1156static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1157{
1158 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1159 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1160 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1161 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1162}
1163
1164
1165/**
1166 * pgmR0DynMapPage worker that deals with the tedious bits.
1167 *
1168 * @returns The page index on success, UINT32_MAX on failure.
1169 * @param pThis The dynamic mapping cache instance.
1170 * @param HCPhys The address of the page to be mapped.
1171 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1172 * @param pVM The shared VM structure, for statistics only.
1173 */
1174static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1175{
1176 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlow);
1177
1178 /*
1179 * Check if any of the first 3 pages are unreferenced since the caller
1180 * already has made sure they aren't matching.
1181 */
1182#ifdef VBOX_WITH_STATISTICS
1183 bool fLooped = false;
1184#endif
1185 uint32_t const cPages = pThis->cPages;
1186 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1187 uint32_t iFreePage;
1188 if (!paPages[iPage].cRefs)
1189 iFreePage = iPage;
1190 else if (!paPages[(iPage + 1) % cPages].cRefs)
1191 iFreePage = (iPage + 1) % cPages;
1192 else if (!paPages[(iPage + 2) % cPages].cRefs)
1193 iFreePage = (iPage + 2) % cPages;
1194 else
1195 {
1196 /*
1197 * Search for an unused or matching entry.
1198 */
1199 iFreePage = (iPage + 3) % cPages;
1200 for (;;)
1201 {
1202 if (paPages[iFreePage].HCPhys == HCPhys)
1203 {
1204 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopHits);
1205 return iFreePage;
1206 }
1207 if (!paPages[iFreePage].cRefs)
1208 break;
1209
1210 /* advance */
1211 iFreePage = (iFreePage + 1) % cPages;
1212 if (RT_UNLIKELY(iFreePage == iPage))
1213 return UINT32_MAX;
1214 }
1215 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopMisses);
1216#ifdef VBOX_WITH_STATISTICS
1217 fLooped = true;
1218#endif
1219 }
1220 Assert(iFreePage < cPages);
1221
1222#if 0 //def VBOX_WITH_STATISTICS
1223 /* Check for lost hits. */
1224 if (!fLooped)
1225 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1226 if (paPages[iPage2].HCPhys == HCPhys)
1227 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLostHits);
1228#endif
1229
1230 /*
1231 * Setup the new entry.
1232 */
1233 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1234 paPages[iFreePage].HCPhys = HCPhys;
1235 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1236 if (pThis->fLegacyMode)
1237 {
1238 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1239 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1240 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1241 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1242 | (HCPhys & X86_PTE_PG_MASK);
1243 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1244 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1245 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1246 }
1247 else
1248 {
1249 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1250 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1251 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1252 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1253 | (HCPhys & X86_PTE_PAE_PG_MASK);
1254 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1255 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1256 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1257 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1258 }
1259 return iFreePage;
1260}
1261
1262
1263/**
1264 * Maps a page into the pool.
1265 *
1266 * @returns Page index on success, UINT32_MAX on failure.
1267 * @param pThis The dynamic mapping cache instance.
1268 * @param HCPhys The address of the page to be mapped.
1269 * @param pVM The shared VM structure, for statistics only.
1270 * @param ppvPage Where to the page address.
1271 */
1272DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, PVM pVM, void **ppvPage)
1273{
1274 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1275 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1276 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1277 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPage);
1278
1279 /*
1280 * Find an entry, if possible a matching one. The HCPhys address is hashed
1281 * down to a page index, collisions are handled by linear searching.
1282 * Optimized for a hit in the first 3 pages.
1283 *
1284 * To the cheap hits here and defer the tedious searching and inserting
1285 * to a helper function.
1286 */
1287 uint32_t const cPages = pThis->cPages;
1288 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1289 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1290 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1291 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHit0);
1292 else
1293 {
1294 uint32_t iPage2 = (iPage + 1) % cPages;
1295 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1296 {
1297 iPage = iPage2;
1298 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHit1);
1299 }
1300 else
1301 {
1302 iPage2 = (iPage + 2) % cPages;
1303 if (paPages[iPage2].HCPhys == HCPhys)
1304 {
1305 iPage = iPage2;
1306 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHit2);
1307 }
1308 else
1309 {
1310 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1311 if (RT_UNLIKELY(iPage == UINT32_MAX))
1312 {
1313 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1314 return iPage;
1315 }
1316 }
1317 }
1318 }
1319
1320 /*
1321 * Reference it, update statistics and get the return address.
1322 */
1323 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1324 if (cRefs == 1)
1325 {
1326 pThis->cLoad++;
1327 if (pThis->cLoad > pThis->cMaxLoad)
1328 pThis->cMaxLoad = pThis->cLoad;
1329 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1330 }
1331 else if (RT_UNLIKELY(cRefs <= 0))
1332 {
1333 ASMAtomicDecS32(&paPages[iPage].cRefs);
1334 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1335 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1336 }
1337 void *pvPage = paPages[iPage].pvPage;
1338
1339 /*
1340 * Invalidate the entry?
1341 */
1342 RTCPUID idRealCpu = RTMpCpuId();
1343 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1344 if (RT_UNLIKELY(fInvalidateIt))
1345 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1346
1347 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1348
1349 /*
1350 * Do the actual invalidation outside the spinlock.
1351 */
1352 if (RT_UNLIKELY(fInvalidateIt))
1353 {
1354 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageInvlPg);
1355 ASMInvalidatePage(pvPage);
1356 }
1357
1358 *ppvPage = pvPage;
1359 return iPage;
1360}
1361
1362
1363/**
1364 * Assert the the integrity of the pool.
1365 *
1366 * @returns VBox status code.
1367 */
1368VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1369{
1370 /*
1371 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1372 */
1373 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1374 if (!pThis)
1375 return VINF_SUCCESS;
1376 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1377 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1378 if (!pThis->cUsers)
1379 return VERR_INVALID_PARAMETER;
1380
1381
1382 int rc = VINF_SUCCESS;
1383 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1384 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1385
1386#define CHECK_RET(expr, a) \
1387 do { \
1388 if (RT_UNLIKELY(!(expr))) \
1389 { \
1390 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1391 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1392 AssertMsg2 a; \
1393 return VERR_INTERNAL_ERROR; \
1394 } \
1395 } while (0)
1396
1397 /*
1398 * Check that the PTEs are correct.
1399 */
1400 uint32_t cGuard = 0;
1401 uint32_t cLoad = 0;
1402 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1403 uint32_t iPage = pThis->cPages;
1404 if (pThis->fLegacyMode)
1405 {
1406 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1407 while (iPage-- > 0)
1408 {
1409 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1410 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1411 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1412 {
1413#ifdef PGMR0DYNMAP_GUARD_NP
1414 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1415 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1416#else
1417 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1418 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1419#endif
1420 cGuard++;
1421 }
1422 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1423 {
1424 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1425 X86PGUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1426 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1427 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1428 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1429 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1430 if (paPages[iPage].cRefs)
1431 cLoad++;
1432 }
1433 else
1434 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1435 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1436 }
1437 }
1438 else
1439 {
1440 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1441 while (iPage-- > 0)
1442 {
1443 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1444 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1445 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1446 {
1447#ifdef PGMR0DYNMAP_GUARD_NP
1448 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1449 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1450#else
1451 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1452 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1453#endif
1454 cGuard++;
1455 }
1456 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1457 {
1458 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1459 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1460 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1461 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1462 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1463 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1464 if (paPages[iPage].cRefs)
1465 cLoad++;
1466 }
1467 else
1468 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1469 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1470 }
1471 }
1472
1473 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1474 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1475
1476#undef CHECK_RET
1477 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1478 return VINF_SUCCESS;
1479}
1480
1481
1482/**
1483 * Signals the start of a new set of mappings.
1484 *
1485 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1486 * API is called.
1487 *
1488 * @param pVCpu The shared data for the current virtual CPU.
1489 */
1490VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1491{
1492 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1493 pVCpu->pgm.s.AutoSet.cEntries = 0;
1494}
1495
1496
1497/**
1498 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1499 * since the PGMDynMapStartAutoSet call.
1500 *
1501 * If the set is already closed, nothing will be done.
1502 *
1503 * @param pVCpu The shared data for the current virtual CPU.
1504 */
1505VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1506{
1507 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1508
1509 /*
1510 * Is the set open?
1511 *
1512 * We might be closed before VM execution and not reopened again before
1513 * we leave for ring-3 or something.
1514 */
1515 uint32_t i = pSet->cEntries;
1516 if (i != PGMMAPSET_CLOSED)
1517 {
1518 /*
1519 * Close the set
1520 */
1521 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1522 pSet->cEntries = PGMMAPSET_CLOSED;
1523
1524 /*
1525 * Release any pages it's referencing.
1526 */
1527 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1528 {
1529 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1530 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1531 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1532
1533 while (i-- > 0)
1534 {
1535 uint32_t iPage = pSet->aEntries[i].iPage;
1536 Assert(iPage < pThis->cPages);
1537 int32_t cRefs = pSet->aEntries[i].cRefs;
1538 Assert(cRefs > 0);
1539 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1540
1541 pSet->aEntries[i].iPage = UINT16_MAX;
1542 pSet->aEntries[i].cRefs = 0;
1543 }
1544
1545 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1546 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1547 }
1548 }
1549}
1550
1551
1552/**
1553 * Migrates the automatic mapping set of the current vCPU if it's active and
1554 * necessary.
1555 *
1556 * This is called when re-entering the hardware assisted execution mode after a
1557 * nip down to ring-3. We run the risk that the CPU might have change and we
1558 * will therefore make sure all the cache entries currently in the auto set will
1559 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1560 * the entries will have been flagged as invalidated.
1561 *
1562 * @param pVCpu The shared data for the current virtual CPU.
1563 * @thread EMT
1564 */
1565VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1566{
1567 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1568 uint32_t i = pSet->cEntries;
1569 if (i != PGMMAPSET_CLOSED)
1570 {
1571 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1572 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1573 {
1574 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1575 RTCPUID idRealCpu = RTMpCpuId();
1576
1577 while (i-- > 0)
1578 {
1579 Assert(pSet->aEntries[i].cRefs > 0);
1580 uint32_t iPage = pSet->aEntries[i].iPage;
1581 Assert(iPage < pThis->cPages);
1582 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1583 {
1584 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1585 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1586 STAM_COUNTER_INC(&pVCpu->pVMR0->pgm.s.StatR0DynMapMigrateInvlPg);
1587 }
1588 }
1589 }
1590 }
1591}
1592
1593
1594/**
1595 * As a final resort for a full auto set, try merge duplicate entries.
1596 *
1597 * @param pSet The set.
1598 */
1599static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1600{
1601 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1602 {
1603 uint16_t const iPage = pSet->aEntries[i].iPage;
1604 uint32_t j = i + 1;
1605 while (j < pSet->cEntries)
1606 {
1607 if (pSet->aEntries[j].iPage != iPage)
1608 j++;
1609 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1610 {
1611 /* merge j into i removing j. */
1612 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1613 pSet->cEntries--;
1614 if (j < pSet->cEntries)
1615 {
1616 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1617 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1618 pSet->aEntries[pSet->cEntries].cRefs = 0;
1619 }
1620 else
1621 {
1622 pSet->aEntries[j].iPage = UINT16_MAX;
1623 pSet->aEntries[j].cRefs = 0;
1624 }
1625 }
1626 else
1627 {
1628 /* migrate the max number of refs from j into i and quit the inner loop. */
1629 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1630 Assert(pSet->aEntries[j].cRefs > cMigrate);
1631 pSet->aEntries[j].cRefs -= cMigrate;
1632 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1633 break;
1634 }
1635 }
1636 }
1637}
1638
1639
1640/* documented elsewhere - a bit of a mess.
1641 This is a VERY hot path. */
1642VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1643{
1644 /*
1645 * Validate state.
1646 */
1647 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPage);
1648 AssertPtr(ppv);
1649 *ppv = NULL;
1650 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1651 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1652 VERR_ACCESS_DENIED);
1653 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1654 PVMCPU pVCpu = VMMGetCpu(pVM);
1655 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1656 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1657 AssertMsgReturn(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1658 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1659
1660 /*
1661 * Map it.
1662 */
1663 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pVM, ppv);
1664 if (RT_UNLIKELY(iPage == UINT32_MAX))
1665 {
1666 static uint32_t s_cBitched = 0;
1667 if (++s_cBitched < 10)
1668 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1669 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages));
1670 return VERR_PGM_DYNMAP_FAILED;
1671 }
1672
1673 /*
1674 * Add the page to the auto reference set.
1675 *
1676 * The typical usage pattern means that the same pages will be mapped
1677 * several times in the same set. We can catch most of these
1678 * remappings by looking a few pages back into the set. (The searching
1679 * and set optimizing path will hardly ever be used when doing this.)
1680 */
1681 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1682 int32_t i = pSet->cEntries;
1683 if (i-- < 5)
1684 {
1685 pSet->aEntries[pSet->cEntries].cRefs = 1;
1686 pSet->aEntries[pSet->cEntries].iPage = iPage;
1687 pSet->cEntries++;
1688 }
1689 /* Any of the last 5 pages? */
1690 else if ( pSet->aEntries[i - 0].iPage == iPage
1691 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1692 pSet->aEntries[i - 0].cRefs++;
1693 else if ( pSet->aEntries[i - 1].iPage == iPage
1694 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1695 pSet->aEntries[i - 1].cRefs++;
1696 else if ( pSet->aEntries[i - 2].iPage == iPage
1697 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1698 pSet->aEntries[i - 2].cRefs++;
1699 else if ( pSet->aEntries[i - 3].iPage == iPage
1700 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1701 pSet->aEntries[i - 3].cRefs++;
1702 else if ( pSet->aEntries[i - 4].iPage == iPage
1703 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1704 pSet->aEntries[i - 4].cRefs++;
1705 /* Don't bother searching unless we're above a 75% load. */
1706 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) / 4 * 3))
1707 {
1708 pSet->aEntries[pSet->cEntries].cRefs = 1;
1709 pSet->aEntries[pSet->cEntries].iPage = iPage;
1710 pSet->cEntries++;
1711 }
1712 else
1713 {
1714 /* Search the rest of the set. */
1715 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1716 i -= 4;
1717 while (i-- > 0)
1718 if ( pSet->aEntries[i].iPage == iPage
1719 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1720 {
1721 pSet->aEntries[i].cRefs++;
1722 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetSearchHits);
1723 break;
1724 }
1725 if (i < 0)
1726 {
1727 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetSearchMisses);
1728 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1729 {
1730 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetOptimize);
1731 pgmDynMapOptimizeAutoSet(pSet);
1732 }
1733 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1734 {
1735 pSet->aEntries[pSet->cEntries].cRefs = 1;
1736 pSet->aEntries[pSet->cEntries].iPage = iPage;
1737 pSet->cEntries++;
1738 }
1739 else
1740 {
1741 /* We're screwed. */
1742 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1743
1744 static uint32_t s_cBitched = 0;
1745 if (++s_cBitched < 10)
1746 LogRel(("PGMDynMapHCPage: set is full!\n"));
1747 *ppv = NULL;
1748 return VERR_PGM_DYNMAP_FULL_SET;
1749 }
1750 }
1751 }
1752
1753 return VINF_SUCCESS;
1754}
1755
1756
1757#ifdef DEBUG
1758/** For pgmR0DynMapTest3PerCpu. */
1759typedef struct PGMR0DYNMAPTEST
1760{
1761 uint32_t u32Expect;
1762 uint32_t *pu32;
1763 uint32_t volatile cFailures;
1764} PGMR0DYNMAPTEST;
1765typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1766
1767/**
1768 * Checks that the content of the page is the same on all CPUs, i.e. that there
1769 * are no CPU specfic PTs or similar nasty stuff involved.
1770 *
1771 * @param idCpu The current CPU.
1772 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1773 * @param pvUser2 Unused, ignored.
1774 */
1775static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1776{
1777 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1778 ASMInvalidatePage(pTest->pu32);
1779 if (*pTest->pu32 != pTest->u32Expect)
1780 ASMAtomicIncU32(&pTest->cFailures);
1781 NOREF(pvUser2); NOREF(idCpu);
1782}
1783
1784
1785/**
1786 * Performs some basic tests in debug builds.
1787 */
1788static int pgmR0DynMapTest(PVM pVM)
1789{
1790 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
1791 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1792 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
1793 uint32_t i;
1794
1795 /*
1796 * Assert internal integrity first.
1797 */
1798 LogRel(("Test #0\n"));
1799 int rc = PGMR0DynMapAssertIntegrity();
1800 if (RT_FAILURE(rc))
1801 return rc;
1802
1803 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
1804 pVM->pgm.s.pvR0DynMapUsed = pThis;
1805
1806 /*
1807 * Simple test, map CR3 twice and check that we're getting the
1808 * same mapping address back.
1809 */
1810 LogRel(("Test #1\n"));
1811 ASMIntDisable();
1812 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1813
1814 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
1815 void *pv = (void *)(intptr_t)-1;
1816 void *pv2 = (void *)(intptr_t)-2;
1817 rc = PGMDynMapHCPage(pVM, cr3, &pv);
1818 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
1819 ASMIntEnable();
1820 if ( RT_SUCCESS(rc2)
1821 && RT_SUCCESS(rc)
1822 && pv == pv2)
1823 {
1824 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1825 rc = PGMR0DynMapAssertIntegrity();
1826
1827 /*
1828 * Check that the simple set overflow code works by filling it
1829 * with more CR3 mappings.
1830 */
1831 LogRel(("Test #2\n"));
1832 ASMIntDisable();
1833 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
1834 {
1835 pv2 = (void *)(intptr_t)-4;
1836 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
1837 }
1838 ASMIntEnable();
1839 if (RT_FAILURE(rc) || pv != pv2)
1840 {
1841 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
1842 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1843 }
1844 else if (pSet->cEntries != 5)
1845 {
1846 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
1847 rc = VERR_INTERNAL_ERROR;
1848 }
1849 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
1850 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
1851 || pSet->aEntries[2].cRefs != 1
1852 || pSet->aEntries[1].cRefs != 1
1853 || pSet->aEntries[0].cRefs != 1)
1854 {
1855 LogRel(("failed(%d): bad set dist: ", __LINE__));
1856 for (i = 0; i < pSet->cEntries; i++)
1857 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
1858 LogRel(("\n"));
1859 rc = VERR_INTERNAL_ERROR;
1860 }
1861 if (RT_SUCCESS(rc))
1862 rc = PGMR0DynMapAssertIntegrity();
1863 if (RT_SUCCESS(rc))
1864 {
1865 /*
1866 * Trigger an set optimization run (exactly).
1867 */
1868 LogRel(("Test #3\n"));
1869 ASMIntDisable();
1870 pv2 = NULL;
1871 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
1872 {
1873 pv2 = (void *)(intptr_t)(-5 - i);
1874 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
1875 }
1876 ASMIntEnable();
1877 if (RT_FAILURE(rc) || pv == pv2)
1878 {
1879 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
1880 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1881 }
1882 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
1883 {
1884 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1885 rc = VERR_INTERNAL_ERROR;
1886 }
1887 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1888 if (RT_SUCCESS(rc))
1889 rc = PGMR0DynMapAssertIntegrity();
1890 if (RT_SUCCESS(rc))
1891 {
1892 /*
1893 * Trigger an overflow error.
1894 */
1895 LogRel(("Test #4\n"));
1896 ASMIntDisable();
1897 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
1898 {
1899 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
1900 if (RT_SUCCESS(rc))
1901 rc = PGMR0DynMapAssertIntegrity();
1902 if (RT_FAILURE(rc))
1903 break;
1904 }
1905 ASMIntEnable();
1906 if (rc == VERR_PGM_DYNMAP_FULL_SET)
1907 {
1908 /* flush the set. */
1909 LogRel(("Test #5\n"));
1910 ASMIntDisable();
1911 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1912 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1913 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1914 ASMIntEnable();
1915
1916 rc = PGMR0DynMapAssertIntegrity();
1917 }
1918 else
1919 {
1920 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
1921 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
1922 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1923 }
1924 }
1925 }
1926 }
1927 else
1928 {
1929 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
1930 if (RT_SUCCESS(rc))
1931 rc = rc2;
1932 }
1933
1934 /*
1935 * Check that everyone sees the same stuff.
1936 */
1937 if (RT_SUCCESS(rc))
1938 {
1939 LogRel(("Test #5\n"));
1940 ASMIntDisable();
1941 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
1942 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
1943 if (RT_SUCCESS(rc))
1944 {
1945 PGMR0DYNMAPTEST Test;
1946 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
1947 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
1948 Test.u32Expect = *pu32Real;
1949 ASMAtomicWriteU32(&Test.cFailures, 0);
1950 ASMIntEnable();
1951
1952 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
1953 if (RT_FAILURE(rc))
1954 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
1955 else if (Test.cFailures)
1956 {
1957 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
1958 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1959 rc = VERR_INTERNAL_ERROR;
1960 }
1961 else
1962 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
1963 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1964 }
1965 else
1966 {
1967 ASMIntEnable();
1968 LogRel(("failed(%d): rc=%Rrc\n", rc));
1969 }
1970 }
1971
1972 /*
1973 * Clean up.
1974 */
1975 LogRel(("Cleanup.\n"));
1976 ASMIntDisable();
1977 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1978 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1979 ASMIntEnable();
1980
1981 if (RT_SUCCESS(rc))
1982 rc = PGMR0DynMapAssertIntegrity();
1983 else
1984 PGMR0DynMapAssertIntegrity();
1985
1986 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
1987 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1988 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
1989 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
1990 return rc;
1991}
1992#endif /* DEBUG */
1993
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette