VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 15416

Last change on this file since 15416 was 15411, checked in by vboxsync, 16 years ago

VMM: Working around set overflows caused by the page pool.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 76.8 KB
Line 
1/* $Id: PGMR0DynMap.cpp 15411 2008-12-13 03:30:58Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 256
52/** The minimum number of pages we reserve per CPU.
53 * This must be equal or larger than the autoset size. */
54#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 32
55/** The number of guard pages.
56 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
57#if defined(VBOX_STRICT)
58# define PGMR0DYNMAP_GUARD_PAGES 1
59#else
60# define PGMR0DYNMAP_GUARD_PAGES 0
61#endif
62/** The dummy physical address of guard pages. */
63#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
64/** The dummy reference count of guard pages. (Must be non-zero.) */
65#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
66#if 0
67/** Define this to just clear the present bit on guard pages.
68 * The alternative is to replace the entire PTE with an bad not-present
69 * PTE. Either way, XNU will screw us. :-/ */
70#define PGMR0DYNMAP_GUARD_NP
71#endif
72/** The dummy PTE value for a page. */
73#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
74/** The dummy PTE value for a page. */
75#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
76/** Calcs the overload threshold. Current set at 50%. */
77#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
78
79#if 0
80/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
81//#define RTSpinlockAcquire(a,b) do {} while (0)
82//#define RTSpinlockRelease(a,b) do {} while (0)
83#endif
84
85
86/*******************************************************************************
87* Structures and Typedefs *
88*******************************************************************************/
89/**
90 * Ring-0 dynamic mapping cache segment.
91 *
92 * The dynamic mapping cache can be extended with additional segments if the
93 * load is found to be too high. This done the next time a VM is created, under
94 * the protection of the init mutex. The arrays is reallocated and the new
95 * segment is added to the end of these. Nothing is rehashed of course, as the
96 * indexes / addresses must remain unchanged.
97 *
98 * This structure is only modified while owning the init mutex or during module
99 * init / term.
100 */
101typedef struct PGMR0DYNMAPSEG
102{
103 /** Pointer to the next segment. */
104 struct PGMR0DYNMAPSEG *pNext;
105 /** The memory object for the virtual address range that we're abusing. */
106 RTR0MEMOBJ hMemObj;
107 /** The start page in the cache. (I.e. index into the arrays.) */
108 uint16_t iPage;
109 /** The number of pages this segment contributes. */
110 uint16_t cPages;
111 /** The number of page tables. */
112 uint16_t cPTs;
113 /** The memory objects for the page tables. */
114 RTR0MEMOBJ ahMemObjPTs[1];
115} PGMR0DYNMAPSEG;
116/** Pointer to a ring-0 dynamic mapping cache segment. */
117typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
118
119
120/**
121 * Ring-0 dynamic mapping cache entry.
122 *
123 * This structure tracks
124 */
125typedef struct PGMR0DYNMAPENTRY
126{
127 /** The physical address of the currently mapped page.
128 * This is duplicate for three reasons: cache locality, cache policy of the PT
129 * mappings and sanity checks. */
130 RTHCPHYS HCPhys;
131 /** Pointer to the page. */
132 void *pvPage;
133 /** The number of references. */
134 int32_t volatile cRefs;
135 /** PTE pointer union. */
136 union PGMR0DYNMAPENTRY_PPTE
137 {
138 /** PTE pointer, 32-bit legacy version. */
139 PX86PTE pLegacy;
140 /** PTE pointer, PAE version. */
141 PX86PTEPAE pPae;
142 /** PTE pointer, the void version. */
143 void *pv;
144 } uPte;
145 /** CPUs that haven't invalidated this entry after it's last update. */
146 RTCPUSET PendingSet;
147} PGMR0DYNMAPENTRY;
148/** Pointer to a ring-0 dynamic mapping cache entry. */
149typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
150
151
152/**
153 * Ring-0 dynamic mapping cache.
154 *
155 * This is initialized during VMMR0 module init but no segments are allocated at
156 * that time. Segments will be added when the first VM is started and removed
157 * again when the last VM shuts down, thus avoid consuming memory while dormant.
158 * At module termination, the remaining bits will be freed up.
159 */
160typedef struct PGMR0DYNMAP
161{
162 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
163 uint32_t u32Magic;
164 /** Spinlock serializing the normal operation of the cache. */
165 RTSPINLOCK hSpinlock;
166 /** Array for tracking and managing the pages. */
167 PPGMR0DYNMAPENTRY paPages;
168 /** The cache size given as a number of pages. */
169 uint32_t cPages;
170 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
171 bool fLegacyMode;
172 /** The current load.
173 * This does not include guard pages. */
174 uint32_t cLoad;
175 /** The max load ever.
176 * This is maintained to get trigger adding of more mapping space. */
177 uint32_t cMaxLoad;
178 /** Initialization / termination lock. */
179 RTSEMFASTMUTEX hInitLock;
180 /** The number of guard pages. */
181 uint32_t cGuardPages;
182 /** The number of users (protected by hInitLock). */
183 uint32_t cUsers;
184 /** Array containing a copy of the original page tables.
185 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
186 void *pvSavedPTEs;
187 /** List of segments. */
188 PPGMR0DYNMAPSEG pSegHead;
189 /** The paging mode. */
190 SUPPAGINGMODE enmPgMode;
191} PGMR0DYNMAP;
192/** Pointer to the ring-0 dynamic mapping cache */
193typedef PGMR0DYNMAP *PPGMR0DYNMAP;
194
195/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
196#define PGMR0DYNMAP_MAGIC 0x19640201
197
198
199/**
200 * Paging level data.
201 */
202typedef struct PGMR0DYNMAPPGLVL
203{
204 uint32_t cLevels; /**< The number of levels. */
205 struct
206 {
207 RTHCPHYS HCPhys; /**< The address of the page for the current level,
208 * i.e. what hMemObj/hMapObj is currently mapping. */
209 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
210 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
211 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
212 uint32_t fPtrShift; /**< The pointer shift count. */
213 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
214 uint64_t fAndMask; /**< And mask to check entry flags. */
215 uint64_t fResMask; /**< The result from applying fAndMask. */
216 union
217 {
218 void *pv; /**< hMapObj address. */
219 PX86PGUINT paLegacy; /**< Legacy table view. */
220 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
221 } u;
222 } a[4];
223} PGMR0DYNMAPPGLVL;
224/** Pointer to paging level data. */
225typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
226
227
228/*******************************************************************************
229* Global Variables *
230*******************************************************************************/
231/** Pointer to the ring-0 dynamic mapping cache. */
232static PPGMR0DYNMAP g_pPGMR0DynMap;
233
234
235/*******************************************************************************
236* Internal Functions *
237*******************************************************************************/
238static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
239static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
240static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
241static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
242#ifdef DEBUG
243static int pgmR0DynMapTest(PVM pVM);
244#endif
245
246
247/**
248 * Initializes the ring-0 dynamic mapping cache.
249 *
250 * @returns VBox status code.
251 */
252VMMR0DECL(int) PGMR0DynMapInit(void)
253{
254 Assert(!g_pPGMR0DynMap);
255
256 /*
257 * Create and initialize the cache instance.
258 */
259 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
260 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
261 int rc = VINF_SUCCESS;
262 pThis->enmPgMode = SUPR0GetPagingMode();
263 switch (pThis->enmPgMode)
264 {
265 case SUPPAGINGMODE_32_BIT:
266 case SUPPAGINGMODE_32_BIT_GLOBAL:
267 pThis->fLegacyMode = false;
268 break;
269 case SUPPAGINGMODE_PAE:
270 case SUPPAGINGMODE_PAE_GLOBAL:
271 case SUPPAGINGMODE_PAE_NX:
272 case SUPPAGINGMODE_PAE_GLOBAL_NX:
273 case SUPPAGINGMODE_AMD64:
274 case SUPPAGINGMODE_AMD64_GLOBAL:
275 case SUPPAGINGMODE_AMD64_NX:
276 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
277 pThis->fLegacyMode = false;
278 break;
279 default:
280 rc = VERR_INTERNAL_ERROR;
281 break;
282 }
283 if (RT_SUCCESS(rc))
284 {
285 rc = RTSemFastMutexCreate(&pThis->hInitLock);
286 if (RT_SUCCESS(rc))
287 {
288 rc = RTSpinlockCreate(&pThis->hSpinlock);
289 if (RT_SUCCESS(rc))
290 {
291 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
292 g_pPGMR0DynMap = pThis;
293 return VINF_SUCCESS;
294 }
295 RTSemFastMutexDestroy(pThis->hInitLock);
296 }
297 }
298 RTMemFree(pThis);
299 return rc;
300}
301
302
303/**
304 * Terminates the ring-0 dynamic mapping cache.
305 */
306VMMR0DECL(void) PGMR0DynMapTerm(void)
307{
308 /*
309 * Destroy the cache.
310 *
311 * There is not supposed to be any races here, the loader should
312 * make sure about that. So, don't bother locking anything.
313 *
314 * The VM objects should all be destroyed by now, so there is no
315 * dangling users or anything like that to clean up. This routine
316 * is just a mirror image of PGMR0DynMapInit.
317 */
318 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
319 if (pThis)
320 {
321 AssertPtr(pThis);
322 g_pPGMR0DynMap = NULL;
323
324 /* This should *never* happen, but in case it does try not to leak memory. */
325 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
326 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
327 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
328 if (pThis->paPages)
329 pgmR0DynMapTearDown(pThis);
330
331 /* Free the associated resources. */
332 RTSemFastMutexDestroy(pThis->hInitLock);
333 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
334 RTSpinlockDestroy(pThis->hSpinlock);
335 pThis->hSpinlock = NIL_RTSPINLOCK;
336 pThis->u32Magic = UINT32_MAX;
337 RTMemFree(pThis);
338 }
339}
340
341
342/**
343 * Initializes the dynamic mapping cache for a new VM.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the shared VM structure.
347 */
348VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
349{
350 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
351
352 /*
353 * Initialize the auto sets.
354 */
355 VMCPUID idCpu = pVM->cCPUs;
356 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
357 while (idCpu-- > 0)
358 {
359 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
360 uint32_t j = RT_ELEMENTS(pSet->aEntries);
361 while (j-- > 0)
362 {
363 pSet->aEntries[j].iPage = UINT16_MAX;
364 pSet->aEntries[j].cRefs = 0;
365 pSet->aEntries[j].pvPage = NULL;
366 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
367 }
368 pSet->cEntries = PGMMAPSET_CLOSED;
369 pSet->iSubset = UINT32_MAX;
370 pSet->iCpu = -1;
371 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
372 }
373
374 /*
375 * Do we need the cache? Skip the last bit if we don't.
376 */
377 if (!VMMIsHwVirtExtForced(pVM))
378 return VINF_SUCCESS;
379
380 /*
381 * Reference and if necessary setup or expand the cache.
382 */
383 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
384 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
385 int rc = RTSemFastMutexRequest(pThis->hInitLock);
386 AssertLogRelRCReturn(rc, rc);
387
388 pThis->cUsers++;
389 if (pThis->cUsers == 1)
390 {
391 rc = pgmR0DynMapSetup(pThis);
392#ifdef DEBUG
393 if (RT_SUCCESS(rc))
394 {
395 rc = pgmR0DynMapTest(pVM);
396 if (RT_FAILURE(rc))
397 pgmR0DynMapTearDown(pThis);
398 }
399#endif
400 }
401 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
402 rc = pgmR0DynMapExpand(pThis);
403 if (RT_SUCCESS(rc))
404 pVM->pgm.s.pvR0DynMapUsed = pThis;
405 else
406 pThis->cUsers--;
407
408 RTSemFastMutexRelease(pThis->hInitLock);
409 return rc;
410}
411
412
413/**
414 * Terminates the dynamic mapping cache usage for a VM.
415 *
416 * @param pVM Pointer to the shared VM structure.
417 */
418VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
419{
420 /*
421 * Return immediately if we're not using the cache.
422 */
423 if (!pVM->pgm.s.pvR0DynMapUsed)
424 return;
425
426 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
427 AssertPtrReturnVoid(pThis);
428
429 int rc = RTSemFastMutexRequest(pThis->hInitLock);
430 AssertLogRelRCReturnVoid(rc);
431
432 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
433 {
434 pVM->pgm.s.pvR0DynMapUsed = NULL;
435
436#ifdef VBOX_STRICT
437 PGMR0DynMapAssertIntegrity();
438#endif
439
440 /*
441 * Clean up and check the auto sets.
442 */
443 VMCPUID idCpu = pVM->cCPUs;
444 while (idCpu-- > 0)
445 {
446 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
447 uint32_t j = pSet->cEntries;
448 if (j <= RT_ELEMENTS(pSet->aEntries))
449 {
450 /*
451 * The set is open, close it.
452 */
453 while (j-- > 0)
454 {
455 int32_t cRefs = pSet->aEntries[j].cRefs;
456 uint32_t iPage = pSet->aEntries[j].iPage;
457 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
458 if (iPage < pThis->cPages && cRefs > 0)
459 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
460 else
461 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
462
463 pSet->aEntries[j].iPage = UINT16_MAX;
464 pSet->aEntries[j].cRefs = 0;
465 pSet->aEntries[j].pvPage = NULL;
466 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
467 }
468 pSet->cEntries = PGMMAPSET_CLOSED;
469 pSet->iSubset = UINT32_MAX;
470 pSet->iCpu = -1;
471 }
472 else
473 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
474
475 j = RT_ELEMENTS(pSet->aEntries);
476 while (j-- > 0)
477 {
478 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
479 Assert(!pSet->aEntries[j].cRefs);
480 }
481 }
482
483 /*
484 * Release our reference to the mapping cache.
485 */
486 Assert(pThis->cUsers > 0);
487 pThis->cUsers--;
488 if (!pThis->cUsers)
489 pgmR0DynMapTearDown(pThis);
490 }
491 else
492 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
493
494 RTSemFastMutexRelease(pThis->hInitLock);
495}
496
497
498/**
499 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
500 *
501 * @param idCpu The current CPU.
502 * @param pvUser1 The dynamic mapping cache instance.
503 * @param pvUser2 Unused, NULL.
504 */
505static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
506{
507 Assert(!pvUser2);
508 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
509 Assert(pThis == g_pPGMR0DynMap);
510 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
511 uint32_t iPage = pThis->cPages;
512 while (iPage-- > 0)
513 ASMInvalidatePage(paPages[iPage].pvPage);
514}
515
516
517/**
518 * Shoot down the TLBs for every single cache entry on all CPUs.
519 *
520 * @returns IPRT status code (RTMpOnAll).
521 * @param pThis The dynamic mapping cache instance.
522 */
523static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
524{
525 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
526 AssertRC(rc);
527 if (RT_FAILURE(rc))
528 {
529 uint32_t iPage = pThis->cPages;
530 while (iPage-- > 0)
531 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
532 }
533 return rc;
534}
535
536
537/**
538 * Calculate the new cache size based on cMaxLoad statistics.
539 *
540 * @returns Number of pages.
541 * @param pThis The dynamic mapping cache instance.
542 * @param pcMinPages The minimal size in pages.
543 */
544static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
545{
546 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
547
548 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
549 RTCPUID cCpus = RTMpGetCount();
550 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
551 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
552 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
553
554 /* adjust against cMaxLoad. */
555 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
556 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
557 pThis->cMaxLoad = 0;
558
559 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
560 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
561
562 if (pThis->cMaxLoad > cMinPages)
563 cMinPages = pThis->cMaxLoad;
564
565 /* adjust against max and current size. */
566 if (cPages < pThis->cPages)
567 cPages = pThis->cPages;
568 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
569 if (cPages > PGMR0DYNMAP_MAX_PAGES)
570 cPages = PGMR0DYNMAP_MAX_PAGES;
571
572 if (cMinPages < pThis->cPages)
573 cMinPages = pThis->cPages;
574 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
575 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
576 cMinPages = PGMR0DYNMAP_MAX_PAGES;
577
578 Assert(cMinPages);
579 *pcMinPages = cMinPages;
580 return cPages;
581}
582
583
584/**
585 * Initializes the paging level data.
586 *
587 * @param pThis The dynamic mapping cache instance.
588 * @param pPgLvl The paging level data.
589 */
590void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
591{
592 RTCCUINTREG cr4 = ASMGetCR4();
593 switch (pThis->enmPgMode)
594 {
595 case SUPPAGINGMODE_32_BIT:
596 case SUPPAGINGMODE_32_BIT_GLOBAL:
597 pPgLvl->cLevels = 2;
598 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
599 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
600 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
601 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
602 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
603
604 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
605 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
606 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
607 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
608 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
609 break;
610
611 case SUPPAGINGMODE_PAE:
612 case SUPPAGINGMODE_PAE_GLOBAL:
613 case SUPPAGINGMODE_PAE_NX:
614 case SUPPAGINGMODE_PAE_GLOBAL_NX:
615 pPgLvl->cLevels = 3;
616 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
617 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
618 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
619 pPgLvl->a[0].fAndMask = X86_PDPE_P;
620 pPgLvl->a[0].fResMask = X86_PDPE_P;
621
622 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
623 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
624 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
625 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
626 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
627
628 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
629 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
630 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
631 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
632 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
633 break;
634
635 case SUPPAGINGMODE_AMD64:
636 case SUPPAGINGMODE_AMD64_GLOBAL:
637 case SUPPAGINGMODE_AMD64_NX:
638 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
639 pPgLvl->cLevels = 4;
640 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
641 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
642 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
643 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
644 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
645
646 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
647 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
648 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
649 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
650 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
651
652 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
653 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
654 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
655 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
656 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
657
658 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
659 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
660 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
661 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
662 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
663 break;
664
665 default:
666 AssertFailed();
667 pPgLvl->cLevels = 0;
668 break;
669 }
670
671 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
672 {
673 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
674 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
675 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
676 pPgLvl->a[i].u.pv = NULL;
677 }
678}
679
680
681/**
682 * Maps a PTE.
683 *
684 * This will update the segment structure when new PTs are mapped.
685 *
686 * It also assumes that we (for paranoid reasons) wish to establish a mapping
687 * chain from CR3 to the PT that all corresponds to the processor we're
688 * currently running on, and go about this by running with interrupts disabled
689 * and restarting from CR3 for every change.
690 *
691 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
692 * to re-enable interrupts.
693 * @param pThis The dynamic mapping cache instance.
694 * @param pPgLvl The paging level structure.
695 * @param pvPage The page.
696 * @param pSeg The segment.
697 * @param cMaxPTs The max number of PTs expected in the segment.
698 * @param ppvPTE Where to store the PTE address.
699 */
700static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
701 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
702{
703 Assert(!(ASMGetFlags() & X86_EFL_IF));
704 void *pvEntry = NULL;
705 X86PGPAEUINT uEntry = ASMGetCR3();
706 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
707 {
708 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
709 if (pPgLvl->a[i].HCPhys != HCPhys)
710 {
711 /*
712 * Need to remap this level.
713 * The final level, the PT, will not be freed since that is what it's all about.
714 */
715 ASMIntEnable();
716 if (i + 1 == pPgLvl->cLevels)
717 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
718 else
719 {
720 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
721 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
722 }
723
724 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
725 if (RT_SUCCESS(rc))
726 {
727 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
728 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
729 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
730 if (RT_SUCCESS(rc))
731 {
732 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
733 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
734 pPgLvl->a[i].HCPhys = HCPhys;
735 if (i + 1 == pPgLvl->cLevels)
736 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
737 ASMIntDisable();
738 return VINF_TRY_AGAIN;
739 }
740
741 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
742 }
743 else
744 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
745 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
746 return rc;
747 }
748
749 /*
750 * The next level.
751 */
752 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
753 if (pThis->fLegacyMode)
754 {
755 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
756 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
757 }
758 else
759 {
760 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
761 uEntry = pPgLvl->a[i].u.paPae[iEntry];
762 }
763
764 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
765 {
766 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
767 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
768 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
769 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
770 return VERR_INTERNAL_ERROR;
771 }
772 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
773 }
774
775 /* made it thru without needing to remap anything. */
776 *ppvPTE = pvEntry;
777 return VINF_SUCCESS;
778}
779
780
781/**
782 * Sets up a guard page.
783 *
784 * @param pThis The dynamic mapping cache instance.
785 * @param pPage The page.
786 */
787DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
788{
789 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
790 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
791 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
792#ifdef PGMR0DYNMAP_GUARD_NP
793 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
794#else
795 if (pThis->fLegacyMode)
796 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
797 else
798 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
799#endif
800 pThis->cGuardPages++;
801}
802
803
804/**
805 * Adds a new segment of the specified size.
806 *
807 * @returns VBox status code.
808 * @param pThis The dynamic mapping cache instance.
809 * @param cPages The size of the new segment, give as a page count.
810 */
811static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
812{
813 int rc2;
814 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
815
816 /*
817 * Do the array reallocations first.
818 * (The pages array has to be replaced behind the spinlock of course.)
819 */
820 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
821 if (!pvSavedPTEs)
822 return VERR_NO_MEMORY;
823 pThis->pvSavedPTEs = pvSavedPTEs;
824
825 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
826 if (!pvPages)
827 {
828 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
829 if (pvSavedPTEs)
830 pThis->pvSavedPTEs = pvSavedPTEs;
831 return VERR_NO_MEMORY;
832 }
833
834 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
835 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
836
837 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
838 void *pvToFree = pThis->paPages;
839 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
840
841 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
842 RTMemFree(pvToFree);
843
844 /*
845 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
846 */
847 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
848 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
849 if (!pSeg)
850 return VERR_NO_MEMORY;
851 pSeg->pNext = NULL;
852 pSeg->cPages = cPages;
853 pSeg->iPage = pThis->cPages;
854 pSeg->cPTs = 0;
855 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
856 if (RT_SUCCESS(rc))
857 {
858 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
859 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
860 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
861
862 /*
863 * Walk thru the pages and set them up with a mapping of their PTE and everything.
864 */
865 ASMIntDisable();
866 PGMR0DYNMAPPGLVL PgLvl;
867 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
868 uint32_t const iEndPage = pSeg->iPage + cPages;
869 for (uint32_t iPage = pSeg->iPage;
870 iPage < iEndPage;
871 iPage++, pbPage += PAGE_SIZE)
872 {
873 /* Initialize the page data. */
874 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
875 pThis->paPages[iPage].pvPage = pbPage;
876 pThis->paPages[iPage].cRefs = 0;
877 pThis->paPages[iPage].uPte.pPae = 0;
878 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
879
880 /* Map its page table, retry until we've got a clean run (paranoia). */
881 do
882 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
883 &pThis->paPages[iPage].uPte.pv);
884 while (rc == VINF_TRY_AGAIN);
885 if (RT_FAILURE(rc))
886 break;
887
888 /* Save the PTE. */
889 if (pThis->fLegacyMode)
890 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
891 else
892 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
893
894#ifdef VBOX_STRICT
895 /* Check that we've got the right entry. */
896 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
897 RTHCPHYS HCPhysPte = pThis->fLegacyMode
898 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
899 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
900 if (HCPhysPage != HCPhysPte)
901 {
902 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
903 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
904 rc = VERR_INTERNAL_ERROR;
905 break;
906 }
907#endif
908 } /* for each page */
909 ASMIntEnable();
910
911 /* cleanup non-PT mappings */
912 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
913 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
914
915 if (RT_SUCCESS(rc))
916 {
917#if PGMR0DYNMAP_GUARD_PAGES > 0
918 /*
919 * Setup guard pages.
920 * (Note: TLBs will be shot down later on.)
921 */
922 uint32_t iPage = pSeg->iPage;
923 while (iPage < iEndPage)
924 {
925 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
926 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
927 iPage++; /* the guarded page */
928 }
929
930 /* Make sure the very last page is a guard page too. */
931 iPage = iEndPage - 1;
932 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
933 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
934#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
935
936 /*
937 * Commit it by adding the segment to the list and updating the page count.
938 */
939 pSeg->pNext = pThis->pSegHead;
940 pThis->pSegHead = pSeg;
941 pThis->cPages += cPages;
942 return VINF_SUCCESS;
943 }
944
945 /*
946 * Bail out.
947 */
948 while (pSeg->cPTs-- > 0)
949 {
950 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
951 AssertRC(rc2);
952 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
953 }
954
955 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
956 AssertRC(rc2);
957 pSeg->hMemObj = NIL_RTR0MEMOBJ;
958 }
959 RTMemFree(pSeg);
960
961 /* Don't bother resizing the arrays, but free them if we're the only user. */
962 if (!pThis->cPages)
963 {
964 RTMemFree(pThis->paPages);
965 pThis->paPages = NULL;
966 RTMemFree(pThis->pvSavedPTEs);
967 pThis->pvSavedPTEs = NULL;
968 }
969 return rc;
970}
971
972
973/**
974 * Called by PGMR0DynMapInitVM under the init lock.
975 *
976 * @returns VBox status code.
977 * @param pThis The dynamic mapping cache instance.
978 */
979static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
980{
981 /*
982 * Calc the size and add a segment of that size.
983 */
984 uint32_t cMinPages;
985 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
986 AssertReturn(cPages, VERR_INTERNAL_ERROR);
987 int rc = pgmR0DynMapAddSeg(pThis, cPages);
988 if (rc == VERR_NO_MEMORY)
989 {
990 /*
991 * Try adding smaller segments.
992 */
993 do
994 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
995 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
996 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
997 rc = VINF_SUCCESS;
998 if (rc == VERR_NO_MEMORY)
999 {
1000 if (pThis->cPages)
1001 pgmR0DynMapTearDown(pThis);
1002 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1003 }
1004 }
1005 Assert(ASMGetFlags() & X86_EFL_IF);
1006
1007#if PGMR0DYNMAP_GUARD_PAGES > 0
1008 /* paranoia */
1009 if (RT_SUCCESS(rc))
1010 pgmR0DynMapTlbShootDown(pThis);
1011#endif
1012 return rc;
1013}
1014
1015
1016/**
1017 * Called by PGMR0DynMapInitVM under the init lock.
1018 *
1019 * @returns VBox status code.
1020 * @param pThis The dynamic mapping cache instance.
1021 */
1022static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1023{
1024 /*
1025 * Calc the new target size and add a segment of the appropriate size.
1026 */
1027 uint32_t cMinPages;
1028 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1029 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1030 if (pThis->cPages >= cPages)
1031 return VINF_SUCCESS;
1032
1033 uint32_t cAdd = cPages - pThis->cPages;
1034 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1035 if (rc == VERR_NO_MEMORY)
1036 {
1037 /*
1038 * Try adding smaller segments.
1039 */
1040 do
1041 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1042 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1043 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1044 rc = VINF_SUCCESS;
1045 if (rc == VERR_NO_MEMORY)
1046 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1047 }
1048 Assert(ASMGetFlags() & X86_EFL_IF);
1049
1050#if PGMR0DYNMAP_GUARD_PAGES > 0
1051 /* paranoia */
1052 if (RT_SUCCESS(rc))
1053 pgmR0DynMapTlbShootDown(pThis);
1054#endif
1055 return rc;
1056}
1057
1058
1059/**
1060 * Called by PGMR0DynMapTermVM under the init lock.
1061 *
1062 * @returns VBox status code.
1063 * @param pThis The dynamic mapping cache instance.
1064 */
1065static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1066{
1067 /*
1068 * Restore the original page table entries
1069 */
1070 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1071 uint32_t iPage = pThis->cPages;
1072 if (pThis->fLegacyMode)
1073 {
1074 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1075 while (iPage-- > 0)
1076 {
1077 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1078 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1079 X86PGUINT uNew = paSavedPTEs[iPage];
1080 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1081 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1082 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1083 }
1084 }
1085 else
1086 {
1087 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1088 while (iPage-- > 0)
1089 {
1090 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1091 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1092 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1093 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1094 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1095 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1096 }
1097 }
1098
1099 /*
1100 * Shoot down the TLBs on all CPUs before freeing them.
1101 */
1102 pgmR0DynMapTlbShootDown(pThis);
1103
1104 /*
1105 * Free the segments.
1106 */
1107 while (pThis->pSegHead)
1108 {
1109 int rc;
1110 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1111 pThis->pSegHead = pSeg->pNext;
1112
1113 uint32_t iPT = pSeg->cPTs;
1114 while (iPT-- > 0)
1115 {
1116 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1117 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1118 }
1119 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1120 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1121 pSeg->pNext = NULL;
1122 pSeg->iPage = UINT16_MAX;
1123 pSeg->cPages = 0;
1124 pSeg->cPTs = 0;
1125 RTMemFree(pSeg);
1126 }
1127
1128 /*
1129 * Free the arrays and restore the initial state.
1130 * The cLoadMax value is left behind for the next setup.
1131 */
1132 RTMemFree(pThis->paPages);
1133 pThis->paPages = NULL;
1134 RTMemFree(pThis->pvSavedPTEs);
1135 pThis->pvSavedPTEs = NULL;
1136 pThis->cPages = 0;
1137 pThis->cLoad = 0;
1138 pThis->cGuardPages = 0;
1139}
1140
1141
1142/**
1143 * Release references to a page, caller owns the spin lock.
1144 *
1145 * @param pThis The dynamic mapping cache instance.
1146 * @param iPage The page.
1147 * @param cRefs The number of references to release.
1148 */
1149DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1150{
1151 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1152 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1153 if (!cRefs)
1154 pThis->cLoad--;
1155}
1156
1157
1158/**
1159 * Release references to a page, caller does not own the spin lock.
1160 *
1161 * @param pThis The dynamic mapping cache instance.
1162 * @param iPage The page.
1163 * @param cRefs The number of references to release.
1164 */
1165static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1166{
1167 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1168 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1169 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1170 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1171}
1172
1173
1174/**
1175 * pgmR0DynMapPage worker that deals with the tedious bits.
1176 *
1177 * @returns The page index on success, UINT32_MAX on failure.
1178 * @param pThis The dynamic mapping cache instance.
1179 * @param HCPhys The address of the page to be mapped.
1180 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1181 * @param pVM The shared VM structure, for statistics only.
1182 */
1183static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1184{
1185 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlow);
1186
1187 /*
1188 * Check if any of the first 3 pages are unreferenced since the caller
1189 * already has made sure they aren't matching.
1190 */
1191#ifdef VBOX_WITH_STATISTICS
1192 bool fLooped = false;
1193#endif
1194 uint32_t const cPages = pThis->cPages;
1195 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1196 uint32_t iFreePage;
1197 if (!paPages[iPage].cRefs)
1198 iFreePage = iPage;
1199 else if (!paPages[(iPage + 1) % cPages].cRefs)
1200 iFreePage = (iPage + 1) % cPages;
1201 else if (!paPages[(iPage + 2) % cPages].cRefs)
1202 iFreePage = (iPage + 2) % cPages;
1203 else
1204 {
1205 /*
1206 * Search for an unused or matching entry.
1207 */
1208 iFreePage = (iPage + 3) % cPages;
1209 for (;;)
1210 {
1211 if (paPages[iFreePage].HCPhys == HCPhys)
1212 {
1213 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopHits);
1214 return iFreePage;
1215 }
1216 if (!paPages[iFreePage].cRefs)
1217 break;
1218
1219 /* advance */
1220 iFreePage = (iFreePage + 1) % cPages;
1221 if (RT_UNLIKELY(iFreePage == iPage))
1222 return UINT32_MAX;
1223 }
1224 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopMisses);
1225#ifdef VBOX_WITH_STATISTICS
1226 fLooped = true;
1227#endif
1228 }
1229 Assert(iFreePage < cPages);
1230
1231#if 0 //def VBOX_WITH_STATISTICS
1232 /* Check for lost hits. */
1233 if (!fLooped)
1234 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1235 if (paPages[iPage2].HCPhys == HCPhys)
1236 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLostHits);
1237#endif
1238
1239 /*
1240 * Setup the new entry.
1241 */
1242 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1243 paPages[iFreePage].HCPhys = HCPhys;
1244 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1245 if (pThis->fLegacyMode)
1246 {
1247 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1248 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1249 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1250 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1251 | (HCPhys & X86_PTE_PG_MASK);
1252 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1253 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1254 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1255 }
1256 else
1257 {
1258 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1259 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1260 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1261 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1262 | (HCPhys & X86_PTE_PAE_PG_MASK);
1263 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1264 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1265 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1266 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1267 }
1268 return iFreePage;
1269}
1270
1271
1272/**
1273 * Maps a page into the pool.
1274 *
1275 * @returns Page index on success, UINT32_MAX on failure.
1276 * @param pThis The dynamic mapping cache instance.
1277 * @param HCPhys The address of the page to be mapped.
1278 * @param iRealCpu The real cpu set index. (optimization)
1279 * @param pVM The shared VM structure, for statistics only.
1280 * @param ppvPage Where to the page address.
1281 */
1282DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVM pVM, void **ppvPage)
1283{
1284 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1285 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1286 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1287 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPage);
1288
1289 /*
1290 * Find an entry, if possible a matching one. The HCPhys address is hashed
1291 * down to a page index, collisions are handled by linear searching.
1292 * Optimized for a hit in the first 3 pages.
1293 *
1294 * To the cheap hits here and defer the tedious searching and inserting
1295 * to a helper function.
1296 */
1297 uint32_t const cPages = pThis->cPages;
1298 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1299 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1300 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1301 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHits0);
1302 else
1303 {
1304 uint32_t iPage2 = (iPage + 1) % cPages;
1305 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1306 {
1307 iPage = iPage2;
1308 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHits1);
1309 }
1310 else
1311 {
1312 iPage2 = (iPage + 2) % cPages;
1313 if (paPages[iPage2].HCPhys == HCPhys)
1314 {
1315 iPage = iPage2;
1316 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageHits2);
1317 }
1318 else
1319 {
1320 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1321 if (RT_UNLIKELY(iPage == UINT32_MAX))
1322 {
1323 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1324 return iPage;
1325 }
1326 }
1327 }
1328 }
1329
1330 /*
1331 * Reference it, update statistics and get the return address.
1332 */
1333 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1334 if (cRefs == 1)
1335 {
1336 pThis->cLoad++;
1337 if (pThis->cLoad > pThis->cMaxLoad)
1338 pThis->cMaxLoad = pThis->cLoad;
1339 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1340 }
1341 else if (RT_UNLIKELY(cRefs <= 0))
1342 {
1343 ASMAtomicDecS32(&paPages[iPage].cRefs);
1344 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1345 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1346 }
1347 void *pvPage = paPages[iPage].pvPage;
1348
1349 /*
1350 * Invalidate the entry?
1351 */
1352 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1353 if (RT_UNLIKELY(fInvalidateIt))
1354 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1355
1356 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1357
1358 /*
1359 * Do the actual invalidation outside the spinlock.
1360 */
1361 if (RT_UNLIKELY(fInvalidateIt))
1362 {
1363 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageInvlPg);
1364 ASMInvalidatePage(pvPage);
1365 }
1366
1367 *ppvPage = pvPage;
1368 return iPage;
1369}
1370
1371
1372/**
1373 * Assert the the integrity of the pool.
1374 *
1375 * @returns VBox status code.
1376 */
1377VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1378{
1379 /*
1380 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1381 */
1382 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1383 if (!pThis)
1384 return VINF_SUCCESS;
1385 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1386 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1387 if (!pThis->cUsers)
1388 return VERR_INVALID_PARAMETER;
1389
1390
1391 int rc = VINF_SUCCESS;
1392 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1393 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1394
1395#define CHECK_RET(expr, a) \
1396 do { \
1397 if (RT_UNLIKELY(!(expr))) \
1398 { \
1399 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1400 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1401 AssertMsg2 a; \
1402 return VERR_INTERNAL_ERROR; \
1403 } \
1404 } while (0)
1405
1406 /*
1407 * Check that the PTEs are correct.
1408 */
1409 uint32_t cGuard = 0;
1410 uint32_t cLoad = 0;
1411 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1412 uint32_t iPage = pThis->cPages;
1413 if (pThis->fLegacyMode)
1414 {
1415 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1416 while (iPage-- > 0)
1417 {
1418 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1419 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1420 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1421 {
1422#ifdef PGMR0DYNMAP_GUARD_NP
1423 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1424 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1425#else
1426 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1427 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1428#endif
1429 cGuard++;
1430 }
1431 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1432 {
1433 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1434 X86PGUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1435 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1436 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1437 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1438 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1439 if (paPages[iPage].cRefs)
1440 cLoad++;
1441 }
1442 else
1443 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1444 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1445 }
1446 }
1447 else
1448 {
1449 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1450 while (iPage-- > 0)
1451 {
1452 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1453 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1454 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1455 {
1456#ifdef PGMR0DYNMAP_GUARD_NP
1457 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1458 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1459#else
1460 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1461 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1462#endif
1463 cGuard++;
1464 }
1465 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1466 {
1467 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1468 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1469 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1470 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1471 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1472 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1473 if (paPages[iPage].cRefs)
1474 cLoad++;
1475 }
1476 else
1477 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1478 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1479 }
1480 }
1481
1482 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1483 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1484
1485#undef CHECK_RET
1486 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1487 return VINF_SUCCESS;
1488}
1489
1490
1491/**
1492 * Signals the start of a new set of mappings.
1493 *
1494 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1495 * API is called.
1496 *
1497 * @param pVCpu The shared data for the current virtual CPU.
1498 */
1499VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1500{
1501 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1502 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1503 pVCpu->pgm.s.AutoSet.cEntries = 0;
1504 pVCpu->pgm.s.AutoSet.iCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1505}
1506
1507
1508/**
1509 * Worker that performs the actual flushing of the set.
1510 *
1511 * @param pSet The set to flush.
1512 * @param cEntries The number of entries.
1513 */
1514DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1515{
1516 /*
1517 * Release any pages it's referencing.
1518 */
1519 if ( cEntries != 0
1520 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1521 {
1522 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1523 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1524 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1525
1526 uint32_t i = cEntries;
1527 while (i-- > 0)
1528 {
1529 uint32_t iPage = pSet->aEntries[i].iPage;
1530 Assert(iPage < pThis->cPages);
1531 int32_t cRefs = pSet->aEntries[i].cRefs;
1532 Assert(cRefs > 0);
1533 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1534
1535 pSet->aEntries[i].iPage = UINT16_MAX;
1536 pSet->aEntries[i].cRefs = 0;
1537 }
1538
1539 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1540 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1541 }
1542}
1543
1544
1545/**
1546 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1547 * since the PGMDynMapStartAutoSet call.
1548 *
1549 * @param pVCpu The shared data for the current virtual CPU.
1550 */
1551VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1552{
1553 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1554
1555 /*
1556 * Close and flush the set.
1557 */
1558 uint32_t cEntries = pSet->cEntries;
1559 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1560 AssertMsg(cEntries <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", cEntries, cEntries));
1561 pSet->cEntries = PGMMAPSET_CLOSED;
1562 pSet->iSubset = UINT32_MAX;
1563 pSet->iCpu = -1;
1564
1565 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1566}
1567
1568
1569/**
1570 * Flushes the set if it's above a certain threshold.
1571 *
1572 * @param pVCpu The shared data for the current virtual CPU.
1573 */
1574VMMDECL(void) PGMDynMapFlushAutoSet(PVMCPU pVCpu)
1575{
1576 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1577 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1578
1579 /*
1580 * Only flush it if it's 50% full.
1581 */
1582 uint32_t cEntries = pSet->cEntries;
1583 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1584 if (cEntries >= RT_ELEMENTS(pSet->aEntries) / 2)
1585 {
1586 AssertMsg(cEntries <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", cEntries, cEntries));
1587 pSet->cEntries = 0;
1588
1589 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1590 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1591 }
1592}
1593
1594
1595/**
1596 * Migrates the automatic mapping set of the current vCPU if it's active and
1597 * necessary.
1598 *
1599 * This is called when re-entering the hardware assisted execution mode after a
1600 * nip down to ring-3. We run the risk that the CPU might have change and we
1601 * will therefore make sure all the cache entries currently in the auto set will
1602 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1603 * the entries will have been flagged as invalidated.
1604 *
1605 * @param pVCpu The shared data for the current virtual CPU.
1606 * @thread EMT
1607 */
1608VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1609{
1610 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1611 int32_t iRealCpu = RTMpCpuIdToSetIndex(RTMpCpuId());
1612 if (pSet->iCpu != iRealCpu)
1613 {
1614 uint32_t i = pSet->cEntries;
1615 if (i != PGMMAPSET_CLOSED)
1616 {
1617 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1618 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1619 {
1620 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1621 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1622 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1623
1624 while (i-- > 0)
1625 {
1626 Assert(pSet->aEntries[i].cRefs > 0);
1627 uint32_t iPage = pSet->aEntries[i].iPage;
1628 Assert(iPage < pThis->cPages);
1629 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
1630 {
1631 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
1632 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1633
1634 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1635 STAM_COUNTER_INC(&pVCpu->pVMR0->pgm.s.StatR0DynMapMigrateInvlPg);
1636
1637 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1638 }
1639 }
1640
1641 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1642 }
1643 }
1644 pSet->iCpu = iRealCpu;
1645 }
1646}
1647
1648
1649/**
1650 * Worker function that flushes the current subset.
1651 *
1652 * This is called when the set is popped or when the set
1653 * hash a too high load. As also pointed out elsewhere, the
1654 * whole subset thing is a hack for working around code that
1655 * accesses too many pages. Like PGMPool.
1656 *
1657 * @param pSet The set which subset to flush.
1658 */
1659static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
1660{
1661 uint32_t iSubset = pSet->iSubset;
1662 uint32_t i = pSet->cEntries;
1663 Assert(i <= RT_ELEMENTS(pSet->aEntries));
1664 if ( i > iSubset
1665 && i <= RT_ELEMENTS(pSet->aEntries))
1666 {
1667 pSet->cEntries = iSubset;
1668
1669 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1670 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1671 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1672
1673 while (i-- > iSubset)
1674 {
1675 uint32_t iPage = pSet->aEntries[i].iPage;
1676 Assert(iPage < pThis->cPages);
1677 int32_t cRefs = pSet->aEntries[i].cRefs;
1678 Assert(cRefs > 0);
1679 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1680
1681 pSet->aEntries[i].iPage = UINT16_MAX;
1682 pSet->aEntries[i].cRefs = 0;
1683 }
1684
1685 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1686 }
1687}
1688
1689
1690/**
1691 * Creates a subset.
1692 *
1693 * A subset is a hack to avoid having to rewrite code that touches a lot of
1694 * pages. It prevents the mapping set from being overflowed by automatically
1695 * flushing previous mappings when a certain threshold is reached.
1696 *
1697 * Pages mapped after calling this function are only valid until the next page
1698 * is mapped.
1699 *
1700 * @returns The index of the previous subset. Pass this to
1701 * PGMDynMapPopAutoSubset when poping it.
1702 * @param pVCpu Pointer to the virtual cpu data.
1703 */
1704VMMDECL(uint32_t) PGMDynMapPushAutoSubset(PVMCPU pVCpu)
1705{
1706 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1707 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
1708 uint32_t iPrevSubset = pSet->iSubset;
1709Assert(iPrevSubset == UINT32_MAX);
1710 pSet->iSubset = pSet->cEntries;
1711 STAM_COUNTER_INC(&pVCpu->pVMR0->pgm.s.StatR0DynMapSubsets);
1712 return iPrevSubset;
1713}
1714
1715
1716/**
1717 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
1718 *
1719 * @param pVCpu Pointer to the virtual cpu data.
1720 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
1721 */
1722VMMDECL(void) PGMDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
1723{
1724 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1725 AssertReturnVoid(pSet->cEntries != PGMMAPSET_CLOSED);
1726 AssertReturnVoid(pSet->iSubset <= iPrevSubset || iPrevSubset == UINT32_MAX);
1727Assert(iPrevSubset == UINT32_MAX);
1728 if ( pSet->cEntries >= RT_ELEMENTS(pSet->aEntries) / 2
1729 && pSet->cEntries != pSet->iSubset)
1730 {
1731 STAM_COUNTER_INC(&pVCpu->pVMR0->pgm.s.StatR0DynMapPopFlushes);
1732 pgmDynMapFlushSubset(pSet);
1733 }
1734 pSet->iSubset = iPrevSubset;
1735}
1736
1737
1738/**
1739 * As a final resort for a full auto set, try merge duplicate entries.
1740 *
1741 * @param pSet The set.
1742 */
1743static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1744{
1745 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1746 {
1747 uint16_t const iPage = pSet->aEntries[i].iPage;
1748 uint32_t j = i + 1;
1749 while (j < pSet->cEntries)
1750 {
1751 if (pSet->aEntries[j].iPage != iPage)
1752 j++;
1753 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1754 {
1755 /* merge j into i removing j. */
1756 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1757 pSet->cEntries--;
1758 if (j < pSet->cEntries)
1759 {
1760 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1761 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1762 pSet->aEntries[pSet->cEntries].cRefs = 0;
1763 }
1764 else
1765 {
1766 pSet->aEntries[j].iPage = UINT16_MAX;
1767 pSet->aEntries[j].cRefs = 0;
1768 }
1769 }
1770 else
1771 {
1772 /* migrate the max number of refs from j into i and quit the inner loop. */
1773 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1774 Assert(pSet->aEntries[j].cRefs > cMigrate);
1775 pSet->aEntries[j].cRefs -= cMigrate;
1776 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1777 break;
1778 }
1779 }
1780 }
1781}
1782
1783
1784/**
1785 * Common worker code for PGMDynMapHCPhys, pgmR0DynMapHCPageInlined and
1786 * pgmR0DynMapGCPageInlined.
1787 *
1788 * @returns VBox status code.
1789 * @param pVM The shared VM structure (for statistics).
1790 * @param pSet The set.
1791 * @param HCPhys The physical address of the page.
1792 * @param ppv Where to store the address of the mapping on success.
1793 *
1794 * @remarks This is a very hot path.
1795 */
1796int pgmR0DynMapHCPageCommon(PVM pVM, PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv)
1797{
1798 AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags()));
1799
1800 /*
1801 * Map it.
1802 */
1803 void *pvPage;
1804 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pSet->iCpu, pVM, &pvPage);
1805 if (RT_UNLIKELY(iPage == UINT32_MAX))
1806 {
1807 static uint32_t s_cBitched = 0;
1808 if (++s_cBitched < 10)
1809 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1810 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages));
1811 *ppv = NULL;
1812 return VERR_PGM_DYNMAP_FAILED;
1813 }
1814
1815 /*
1816 * Add the page to the auto reference set.
1817 *
1818 * The typical usage pattern means that the same pages will be mapped
1819 * several times in the same set. We can catch most of these
1820 * remappings by looking a few pages back into the set. (The searching
1821 * and set optimizing path will hardly ever be used when doing this.)
1822 */
1823 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1824 int32_t i = pSet->cEntries;
1825 if (i-- < 5)
1826 {
1827 unsigned iEntry = pSet->cEntries++;
1828 pSet->aEntries[iEntry].cRefs = 1;
1829 pSet->aEntries[iEntry].iPage = iPage;
1830 pSet->aEntries[iEntry].pvPage = pvPage;
1831 pSet->aEntries[iEntry].HCPhys = HCPhys;
1832 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1833 }
1834 /* Any of the last 5 pages? */
1835 else if ( pSet->aEntries[i - 0].iPage == iPage
1836 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1837 pSet->aEntries[i - 0].cRefs++;
1838 else if ( pSet->aEntries[i - 1].iPage == iPage
1839 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1840 pSet->aEntries[i - 1].cRefs++;
1841 else if ( pSet->aEntries[i - 2].iPage == iPage
1842 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1843 pSet->aEntries[i - 2].cRefs++;
1844 else if ( pSet->aEntries[i - 3].iPage == iPage
1845 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1846 pSet->aEntries[i - 3].cRefs++;
1847 else if ( pSet->aEntries[i - 4].iPage == iPage
1848 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1849 pSet->aEntries[i - 4].cRefs++;
1850 /* Don't bother searching unless we're above a 75% load. */
1851 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) / 4 * 3))
1852 {
1853 unsigned iEntry = pSet->cEntries++;
1854 pSet->aEntries[iEntry].cRefs = 1;
1855 pSet->aEntries[iEntry].iPage = iPage;
1856 pSet->aEntries[iEntry].pvPage = pvPage;
1857 pSet->aEntries[iEntry].HCPhys = HCPhys;
1858 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1859 }
1860 else
1861 {
1862 /* Search the rest of the set. */
1863 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1864 i -= 4;
1865 while (i-- > 0)
1866 if ( pSet->aEntries[i].iPage == iPage
1867 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1868 {
1869 pSet->aEntries[i].cRefs++;
1870 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapSetSearchHits);
1871 break;
1872 }
1873 if (i < 0)
1874 {
1875 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapSetSearchMisses);
1876 if (pSet->iSubset < pSet->cEntries)
1877 {
1878 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapSetSearchFlushes);
1879 pgmDynMapFlushSubset(pSet);
1880 }
1881
1882 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1883 {
1884 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapSetOptimize);
1885 pgmDynMapOptimizeAutoSet(pSet);
1886 }
1887
1888 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1889 {
1890 unsigned iEntry = pSet->cEntries++;
1891 pSet->aEntries[iEntry].cRefs = 1;
1892 pSet->aEntries[iEntry].iPage = iPage;
1893 pSet->aEntries[iEntry].pvPage = pvPage;
1894 pSet->aEntries[iEntry].HCPhys = HCPhys;
1895 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
1896 }
1897 else
1898 {
1899 /* We're screwed. */
1900 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1901
1902 static uint32_t s_cBitched = 0;
1903 if (++s_cBitched < 10)
1904 LogRel(("PGMDynMapHCPage: set is full!\n"));
1905 *ppv = NULL;
1906 return VERR_PGM_DYNMAP_FULL_SET;
1907 }
1908 }
1909 }
1910
1911 *ppv = pvPage;
1912 return VINF_SUCCESS;
1913}
1914
1915
1916/* documented elsewhere - a bit of a mess. */
1917VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1918{
1919 /*
1920 * Validate state.
1921 */
1922 STAM_PROFILE_START(&pVM->pgm.s.StatR0DynMapHCPage, a);
1923 AssertPtr(ppv);
1924 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1925 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1926 VERR_ACCESS_DENIED);
1927 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1928 PVMCPU pVCpu = VMMGetCpu(pVM);
1929 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1930 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1931 AssertMsgReturn(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1932 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1933
1934 /*
1935 * Call common code.
1936 */
1937 int rc = pgmR0DynMapHCPageCommon(pVM, pSet, HCPhys, ppv);
1938
1939 STAM_PROFILE_STOP(&pVM->pgm.s.StatR0DynMapHCPage, a);
1940 return rc;
1941}
1942
1943
1944#ifdef DEBUG
1945/** For pgmR0DynMapTest3PerCpu. */
1946typedef struct PGMR0DYNMAPTEST
1947{
1948 uint32_t u32Expect;
1949 uint32_t *pu32;
1950 uint32_t volatile cFailures;
1951} PGMR0DYNMAPTEST;
1952typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1953
1954/**
1955 * Checks that the content of the page is the same on all CPUs, i.e. that there
1956 * are no CPU specfic PTs or similar nasty stuff involved.
1957 *
1958 * @param idCpu The current CPU.
1959 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1960 * @param pvUser2 Unused, ignored.
1961 */
1962static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1963{
1964 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1965 ASMInvalidatePage(pTest->pu32);
1966 if (*pTest->pu32 != pTest->u32Expect)
1967 ASMAtomicIncU32(&pTest->cFailures);
1968 NOREF(pvUser2); NOREF(idCpu);
1969}
1970
1971
1972/**
1973 * Performs some basic tests in debug builds.
1974 */
1975static int pgmR0DynMapTest(PVM pVM)
1976{
1977 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
1978 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1979 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
1980 uint32_t i;
1981
1982 /*
1983 * Assert internal integrity first.
1984 */
1985 LogRel(("Test #0\n"));
1986 int rc = PGMR0DynMapAssertIntegrity();
1987 if (RT_FAILURE(rc))
1988 return rc;
1989
1990 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
1991 pVM->pgm.s.pvR0DynMapUsed = pThis;
1992
1993 /*
1994 * Simple test, map CR3 twice and check that we're getting the
1995 * same mapping address back.
1996 */
1997 LogRel(("Test #1\n"));
1998 ASMIntDisable();
1999 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2000
2001 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2002 void *pv = (void *)(intptr_t)-1;
2003 void *pv2 = (void *)(intptr_t)-2;
2004 rc = PGMDynMapHCPage(pVM, cr3, &pv);
2005 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
2006 ASMIntEnable();
2007 if ( RT_SUCCESS(rc2)
2008 && RT_SUCCESS(rc)
2009 && pv == pv2)
2010 {
2011 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2012 rc = PGMR0DynMapAssertIntegrity();
2013
2014 /*
2015 * Check that the simple set overflow code works by filling it
2016 * with more CR3 mappings.
2017 */
2018 LogRel(("Test #2\n"));
2019 ASMIntDisable();
2020 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2021 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2022 {
2023 pv2 = (void *)(intptr_t)-4;
2024 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
2025 }
2026 ASMIntEnable();
2027 if (RT_FAILURE(rc) || pv != pv2)
2028 {
2029 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2030 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2031 }
2032 else if (pSet->cEntries != 5)
2033 {
2034 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2035 rc = VERR_INTERNAL_ERROR;
2036 }
2037 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2038 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2039 || pSet->aEntries[2].cRefs != 1
2040 || pSet->aEntries[1].cRefs != 1
2041 || pSet->aEntries[0].cRefs != 1)
2042 {
2043 LogRel(("failed(%d): bad set dist: ", __LINE__));
2044 for (i = 0; i < pSet->cEntries; i++)
2045 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2046 LogRel(("\n"));
2047 rc = VERR_INTERNAL_ERROR;
2048 }
2049 if (RT_SUCCESS(rc))
2050 rc = PGMR0DynMapAssertIntegrity();
2051 if (RT_SUCCESS(rc))
2052 {
2053 /*
2054 * Trigger an set optimization run (exactly).
2055 */
2056 LogRel(("Test #3\n"));
2057 ASMIntDisable();
2058 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2059 pv2 = NULL;
2060 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2061 {
2062 pv2 = (void *)(intptr_t)(-5 - i);
2063 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
2064 }
2065 ASMIntEnable();
2066 if (RT_FAILURE(rc) || pv == pv2)
2067 {
2068 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2069 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2070 }
2071 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2072 {
2073 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2074 rc = VERR_INTERNAL_ERROR;
2075 }
2076 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2077 if (RT_SUCCESS(rc))
2078 rc = PGMR0DynMapAssertIntegrity();
2079 if (RT_SUCCESS(rc))
2080 {
2081 /*
2082 * Trigger an overflow error.
2083 */
2084 LogRel(("Test #4\n"));
2085 ASMIntDisable();
2086 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2087 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2088 {
2089 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
2090 if (RT_SUCCESS(rc))
2091 rc = PGMR0DynMapAssertIntegrity();
2092 if (RT_FAILURE(rc))
2093 break;
2094 }
2095 ASMIntEnable();
2096 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2097 {
2098 /* flush the set. */
2099 LogRel(("Test #5\n"));
2100 ASMIntDisable();
2101 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2102 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2103 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
2104 ASMIntEnable();
2105
2106 rc = PGMR0DynMapAssertIntegrity();
2107 }
2108 else
2109 {
2110 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2111 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2112 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
2113 }
2114 }
2115 }
2116 }
2117 else
2118 {
2119 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2120 if (RT_SUCCESS(rc))
2121 rc = rc2;
2122 }
2123
2124 /*
2125 * Check that everyone sees the same stuff.
2126 */
2127 if (RT_SUCCESS(rc))
2128 {
2129 LogRel(("Test #5\n"));
2130 ASMIntDisable();
2131 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2132 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2133 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
2134 if (RT_SUCCESS(rc))
2135 {
2136 PGMR0DYNMAPTEST Test;
2137 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2138 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2139 Test.u32Expect = *pu32Real;
2140 ASMAtomicWriteU32(&Test.cFailures, 0);
2141 ASMIntEnable();
2142
2143 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2144 if (RT_FAILURE(rc))
2145 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2146 else if (Test.cFailures)
2147 {
2148 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2149 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2150 rc = VERR_INTERNAL_ERROR;
2151 }
2152 else
2153 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2154 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2155 }
2156 else
2157 {
2158 ASMIntEnable();
2159 LogRel(("failed(%d): rc=%Rrc\n", rc));
2160 }
2161 }
2162
2163 /*
2164 * Clean up.
2165 */
2166 LogRel(("Cleanup.\n"));
2167 ASMIntDisable();
2168 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
2169 PGMDynMapFlushAutoSet(&pVM->aCpus[0]);
2170 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
2171 ASMIntEnable();
2172
2173 if (RT_SUCCESS(rc))
2174 rc = PGMR0DynMapAssertIntegrity();
2175 else
2176 PGMR0DynMapAssertIntegrity();
2177
2178 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2179 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2180 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2181 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2182 return rc;
2183}
2184#endif /* DEBUG */
2185
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette