VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp@ 86660

Last change on this file since 86660 was 86473, checked in by vboxsync, 4 years ago

VMM/PGM: Working on eliminating page table bitfield use. bugref:9841 bugref:9746

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.2 KB
Line 
1/* $Id: PGMRZDynMap.cpp 86473 2020-10-07 17:30:25Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Internal Functions *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_DYNMAP
23#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
24#include <VBox/vmm/pgm.h>
25#include "PGMInternal.h"
26#include <VBox/vmm/vm.h>
27#include "PGMInline.h"
28#include <VBox/err.h>
29#include <VBox/param.h>
30#include <VBox/sup.h>
31#include <iprt/asm.h>
32#include <iprt/asm-amd64-x86.h>
33#include <iprt/assert.h>
34#ifndef IN_RC
35# include <iprt/cpuset.h>
36# include <iprt/mem.h>
37# include <iprt/memobj.h>
38# include <iprt/mp.h>
39# include <iprt/semaphore.h>
40# include <iprt/spinlock.h>
41#endif
42#include <iprt/string.h>
43
44
45/*********************************************************************************************************************************
46* Defined Constants And Macros *
47*********************************************************************************************************************************/
48#ifdef IN_RING0
49/** The max size of the mapping cache (in pages). */
50# define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
51/** The small segment size that is adopted on out-of-memory conditions with a
52 * single big segment. */
53# define PGMR0DYNMAP_SMALL_SEG_PAGES 128
54/** The number of pages we reserve per CPU. */
55# define PGMR0DYNMAP_PAGES_PER_CPU 256
56/** The minimum number of pages we reserve per CPU.
57 * This must be equal or larger than the autoset size. */
58# define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
59/** Calcs the overload threshold (safety margin). Current set at 50%. */
60# define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
61/** The number of guard pages.
62 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
63# if defined(VBOX_STRICT)
64# define PGMR0DYNMAP_GUARD_PAGES 1
65# else
66# define PGMR0DYNMAP_GUARD_PAGES 0
67# endif
68#endif /* IN_RING0 */
69/** The dummy physical address of guard pages. */
70#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
71/** The dummy reference count of guard pages. (Must be non-zero.) */
72#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
73#if 0
74/** Define this to just clear the present bit on guard pages.
75 * The alternative is to replace the entire PTE with an bad not-present
76 * PTE. Either way, XNU will screw us. :-/ */
77# define PGMR0DYNMAP_GUARD_NP
78#endif
79/** The dummy PTE value for a page. */
80#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
81/** The dummy PTE value for a page. */
82#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
83
84#ifdef IN_RING0 /* Note! Assertions causes panics if preemption is disabled,
85 * disable this to work around that. */
86/**
87 * Acquire the spinlock.
88 * This will declare a temporary variable and expands to two statements!
89 */
90# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) \
91 RTSpinlockAcquire((pThis)->hSpinlock)
92
93/**
94 * Releases the spinlock.
95 */
96# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) \
97 RTSpinlockRelease((pThis)->hSpinlock)
98
99/**
100 * Re-acquires the spinlock.
101 */
102# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) \
103 RTSpinlockAcquire((pThis)->hSpinlock)
104#else
105# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) do { } while (0)
106# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) do { } while (0)
107# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) do { } while (0)
108#endif
109
110
111/** Converts a PGMCPUM::AutoSet pointer into a PVMCPU. */
112#define PGMRZDYNMAP_SET_2_VMCPU(pSet) (RT_FROM_MEMBER(pSet, VMCPU, pgm.s.AutoSet))
113
114/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
115#define PGMRZDYNMAP_SET_2_VM(pSet) (PGMRZDYNMAP_SET_2_VMCPU(pSet)->CTX_SUFF(pVM))
116
117/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
118#ifdef IN_RC
119# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (PGMRZDYNMAP_SET_2_VM(pSet)->pgm.s.pRCDynMap)
120#else
121# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (g_pPGMR0DynMap)
122#endif
123
124/**
125 * Gets the set index of the current CPU.
126 *
127 * This always returns 0 when in raw-mode context because there is only ever
128 * one EMT in that context (at least presently).
129 */
130#ifdef IN_RC
131# define PGMRZDYNMAP_CUR_CPU() (0)
132#else
133# define PGMRZDYNMAP_CUR_CPU() RTMpCurSetIndex()
134#endif
135
136/** PGMRZDYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
137#define PGMRZDYNMAP_MAGIC UINT32_C(0x19640201)
138
139
140/** Zaps an set entry. */
141#define PGMRZDYNMAP_ZAP_ENTRY(pEntry) \
142 do \
143 { \
144 (pEntry)->iPage = UINT16_MAX; \
145 (pEntry)->cRefs = 0; \
146 (pEntry)->cInlinedRefs = 0; \
147 (pEntry)->cUnrefs = 0; \
148 } while (0)
149
150
151/** @def PGMRZDYNMAP_STRICT_RELEASE
152 * Define this to force pages to be released and make non-present ASAP after
153 * use. This should not normally be enabled as it is a bit expensive. */
154#if 0 || defined(DOXYGEN_RUNNING)
155# define PGMRZDYNMAP_STRICT_RELEASE
156#endif
157
158
159/*********************************************************************************************************************************
160* Structures and Typedefs *
161*********************************************************************************************************************************/
162#ifdef IN_RING0
163/**
164 * Ring-0 dynamic mapping cache segment.
165 *
166 * The dynamic mapping cache can be extended with additional segments if the
167 * load is found to be too high. This done the next time a VM is created, under
168 * the protection of the init mutex. The arrays is reallocated and the new
169 * segment is added to the end of these. Nothing is rehashed of course, as the
170 * indexes / addresses must remain unchanged.
171 *
172 * This structure is only modified while owning the init mutex or during module
173 * init / term.
174 */
175typedef struct PGMR0DYNMAPSEG
176{
177 /** Pointer to the next segment. */
178 struct PGMR0DYNMAPSEG *pNext;
179 /** The memory object for the virtual address range that we're abusing. */
180 RTR0MEMOBJ hMemObj;
181 /** The start page in the cache. (I.e. index into the arrays.) */
182 uint16_t iPage;
183 /** The number of pages this segment contributes. */
184 uint16_t cPages;
185 /** The number of page tables. */
186 uint16_t cPTs;
187 /** The memory objects for the page tables. */
188 RTR0MEMOBJ ahMemObjPTs[1];
189} PGMR0DYNMAPSEG;
190/** Pointer to a ring-0 dynamic mapping cache segment. */
191typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
192
193
194/**
195 * Ring-0 dynamic mapping cache entry.
196 *
197 * @sa PGMRZDYNMAPENTRY, PGMRCDYNMAPENTRY.
198 */
199typedef struct PGMR0DYNMAPENTRY
200{
201 /** The physical address of the currently mapped page.
202 * This is duplicate for three reasons: cache locality, cache policy of the PT
203 * mappings and sanity checks. */
204 RTHCPHYS HCPhys;
205 /** Pointer to the page. */
206 void *pvPage;
207 /** The number of references. */
208 int32_t volatile cRefs;
209 /** PTE pointer union. */
210 union PGMR0DYNMAPENTRY_PPTE
211 {
212 /** PTE pointer, 32-bit legacy version. */
213 PX86PTE pLegacy;
214 /** PTE pointer, PAE version. */
215 PX86PTEPAE pPae;
216 /** PTE pointer, the void version. */
217 void *pv;
218 } uPte;
219 /** CPUs that haven't invalidated this entry after it's last update. */
220 RTCPUSET PendingSet;
221} PGMR0DYNMAPENTRY;
222/** Pointer a mapping cache entry for the ring-0.
223 * @sa PPGMRZDYNMAPENTRY, PPGMRCDYNMAPENTRY, */
224typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
225
226
227/**
228 * Dynamic mapping cache for ring-0.
229 *
230 * This is initialized during VMMR0 module init but no segments are allocated
231 * at that time. Segments will be added when the first VM is started and
232 * removed again when the last VM shuts down, thus avoid consuming memory while
233 * dormant. At module termination, the remaining bits will be freed up.
234 *
235 * @sa PPGMRZDYNMAP, PGMRCDYNMAP.
236 */
237typedef struct PGMR0DYNMAP
238{
239 /** The usual magic number / eye catcher (PGMRZDYNMAP_MAGIC). */
240 uint32_t u32Magic;
241 /** Spinlock serializing the normal operation of the cache. */
242 RTSPINLOCK hSpinlock;
243 /** Array for tracking and managing the pages. */
244 PPGMR0DYNMAPENTRY paPages;
245 /** The cache size given as a number of pages. */
246 uint32_t cPages;
247 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
248 bool fLegacyMode;
249 /** The current load.
250 * This does not include guard pages. */
251 uint32_t cLoad;
252 /** The max load ever.
253 * This is maintained to trigger the adding of more mapping space. */
254 uint32_t cMaxLoad;
255 /** Initialization / termination lock. */
256 RTSEMFASTMUTEX hInitLock;
257 /** The number of guard pages. */
258 uint32_t cGuardPages;
259 /** The number of users (protected by hInitLock). */
260 uint32_t cUsers;
261 /** Array containing a copy of the original page tables.
262 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
263 void *pvSavedPTEs;
264 /** List of segments. */
265 PPGMR0DYNMAPSEG pSegHead;
266 /** The paging mode. */
267 SUPPAGINGMODE enmPgMode;
268} PGMR0DYNMAP;
269
270
271/**
272 * Paging level data.
273 */
274typedef struct PGMR0DYNMAPPGLVL
275{
276 uint32_t cLevels; /**< The number of levels. */
277 struct
278 {
279 RTHCPHYS HCPhys; /**< The address of the page for the current level,
280 * i.e. what hMemObj/hMapObj is currently mapping. */
281 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
282 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
283 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
284 uint32_t fPtrShift; /**< The pointer shift count. */
285 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
286 uint64_t fAndMask; /**< And mask to check entry flags. */
287 uint64_t fResMask; /**< The result from applying fAndMask. */
288 union
289 {
290 void *pv; /**< hMapObj address. */
291 PX86PGUINT paLegacy; /**< Legacy table view. */
292 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
293 } u;
294 } a[4];
295} PGMR0DYNMAPPGLVL;
296/** Pointer to paging level data. */
297typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
298#endif
299
300/** Mapping cache entry for the current context.
301 * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
302typedef CTX_MID(PGM,DYNMAPENTRY) PGMRZDYNMAPENTRY;
303/** Pointer a mapping cache entry for the current context.
304 * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
305typedef PGMRZDYNMAPENTRY *PPGMRZDYNMAPENTRY;
306
307/** Pointer to the mapping cache instance for the current context.
308 * @sa PGMR0DYNMAP, PGMRCDYNMAP */
309typedef CTX_MID(PGM,DYNMAP) *PPGMRZDYNMAP;
310
311
312
313/*********************************************************************************************************************************
314* Global Variables *
315*********************************************************************************************************************************/
316#ifdef IN_RING0
317/** Pointer to the ring-0 dynamic mapping cache. */
318static PGMR0DYNMAP *g_pPGMR0DynMap;
319#endif
320/** For overflow testing. */
321static bool g_fPGMR0DynMapTestRunning = false;
322
323
324/*********************************************************************************************************************************
325* Internal Functions *
326*********************************************************************************************************************************/
327static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs);
328#ifdef IN_RING0
329static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis);
330static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis);
331static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis);
332#endif
333#if 0 /*def DEBUG*/
334static int pgmR0DynMapTest(PVM pVM);
335#endif
336
337
338/**
339 * Initializes the auto mapping sets for a VM.
340 *
341 * @returns VINF_SUCCESS on success, VERR_PGM_DYNMAP_IPE on failure.
342 * @param pVM The cross context VM structure.
343 */
344static int pgmRZDynMapInitAutoSetsForVM(PVM pVM)
345{
346 VMCPUID idCpu = pVM->cCpus;
347 AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_PGM_DYNMAP_IPE);
348 while (idCpu-- > 0)
349 {
350 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
351 uint32_t j = RT_ELEMENTS(pSet->aEntries);
352 while (j-- > 0)
353 {
354 pSet->aEntries[j].pvPage = NULL;
355 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
356 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
357 }
358 pSet->cEntries = PGMMAPSET_CLOSED;
359 pSet->iSubset = UINT32_MAX;
360 pSet->iCpu = -1;
361 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
362 }
363
364 return VINF_SUCCESS;
365}
366
367
368#ifdef IN_RING0
369
370/**
371 * Initializes the ring-0 dynamic mapping cache.
372 *
373 * @returns VBox status code.
374 */
375VMMR0DECL(int) PGMR0DynMapInit(void)
376{
377 Assert(!g_pPGMR0DynMap);
378
379 /*
380 * Create and initialize the cache instance.
381 */
382 PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)RTMemAllocZ(sizeof(*pThis));
383 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
384 int rc = VINF_SUCCESS;
385 pThis->enmPgMode = SUPR0GetPagingMode();
386 switch (pThis->enmPgMode)
387 {
388 case SUPPAGINGMODE_32_BIT:
389 case SUPPAGINGMODE_32_BIT_GLOBAL:
390 pThis->fLegacyMode = false;
391 break;
392 case SUPPAGINGMODE_PAE:
393 case SUPPAGINGMODE_PAE_GLOBAL:
394 case SUPPAGINGMODE_PAE_NX:
395 case SUPPAGINGMODE_PAE_GLOBAL_NX:
396 case SUPPAGINGMODE_AMD64:
397 case SUPPAGINGMODE_AMD64_GLOBAL:
398 case SUPPAGINGMODE_AMD64_NX:
399 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
400 pThis->fLegacyMode = false;
401 break;
402 default:
403 rc = VERR_PGM_DYNMAP_IPE;
404 break;
405 }
406 if (RT_SUCCESS(rc))
407 {
408 rc = RTSemFastMutexCreate(&pThis->hInitLock);
409 if (RT_SUCCESS(rc))
410 {
411 rc = RTSpinlockCreate(&pThis->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "PGMR0DynMap");
412 if (RT_SUCCESS(rc))
413 {
414 pThis->u32Magic = PGMRZDYNMAP_MAGIC;
415 g_pPGMR0DynMap = pThis;
416 return VINF_SUCCESS;
417 }
418 RTSemFastMutexDestroy(pThis->hInitLock);
419 }
420 }
421 RTMemFree(pThis);
422 return rc;
423}
424
425
426/**
427 * Terminates the ring-0 dynamic mapping cache.
428 */
429VMMR0DECL(void) PGMR0DynMapTerm(void)
430{
431 /*
432 * Destroy the cache.
433 *
434 * There is not supposed to be any races here, the loader should
435 * make sure about that. So, don't bother locking anything.
436 *
437 * The VM objects should all be destroyed by now, so there is no
438 * dangling users or anything like that to clean up. This routine
439 * is just a mirror image of PGMR0DynMapInit.
440 */
441 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
442 if (pThis)
443 {
444 AssertPtr(pThis);
445 g_pPGMR0DynMap = NULL;
446
447 /* This should *never* happen, but in case it does try not to leak memory. */
448 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
449 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
450 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
451 if (pThis->paPages)
452 pgmR0DynMapTearDown(pThis);
453
454 /* Free the associated resources. */
455 RTSemFastMutexDestroy(pThis->hInitLock);
456 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
457 RTSpinlockDestroy(pThis->hSpinlock);
458 pThis->hSpinlock = NIL_RTSPINLOCK;
459 pThis->u32Magic = UINT32_MAX;
460 RTMemFree(pThis);
461 }
462}
463
464
465/**
466 * Initializes the dynamic mapping cache for a new VM.
467 *
468 * @returns VBox status code.
469 * @param pVM The cross context VM structure.
470 */
471VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
472{
473 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
474
475 /*
476 * Initialize the auto sets.
477 */
478 int rc = pgmRZDynMapInitAutoSetsForVM(pVM);
479 if (RT_FAILURE(rc))
480 return rc;
481
482 /*
483 * Do we need the cache? Skip the last bit if we don't.
484 */
485 if (VM_IS_RAW_MODE_ENABLED(pVM))
486 return VINF_SUCCESS;
487
488 /*
489 * Reference and if necessary setup or expand the cache.
490 */
491 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
492 AssertPtrReturn(pThis, VERR_PGM_DYNMAP_IPE);
493 rc = RTSemFastMutexRequest(pThis->hInitLock);
494 AssertLogRelRCReturn(rc, rc);
495
496 pThis->cUsers++;
497 if (pThis->cUsers == 1)
498 {
499 rc = pgmR0DynMapSetup(pThis);
500#if 0 /*def DEBUG*/
501 if (RT_SUCCESS(rc))
502 {
503 rc = pgmR0DynMapTest(pVM);
504 if (RT_FAILURE(rc))
505 pgmR0DynMapTearDown(pThis);
506 }
507#endif
508 }
509 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
510 rc = pgmR0DynMapExpand(pThis);
511 if (RT_SUCCESS(rc))
512 pVM->pgm.s.pvR0DynMapUsed = pThis;
513 else
514 pThis->cUsers--;
515
516 RTSemFastMutexRelease(pThis->hInitLock);
517 return rc;
518}
519
520
521/**
522 * Terminates the dynamic mapping cache usage for a VM.
523 *
524 * @param pVM The cross context VM structure.
525 */
526VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
527{
528 /*
529 * Return immediately if we're not using the cache.
530 */
531 if (!pVM->pgm.s.pvR0DynMapUsed)
532 return;
533
534 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
535 AssertPtrReturnVoid(pThis);
536
537 int rc = RTSemFastMutexRequest(pThis->hInitLock);
538 AssertLogRelRCReturnVoid(rc);
539
540 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
541 {
542 pVM->pgm.s.pvR0DynMapUsed = NULL;
543
544#ifdef VBOX_STRICT
545 PGMR0DynMapAssertIntegrity();
546#endif
547
548 /*
549 * Clean up and check the auto sets.
550 */
551 VMCPUID idCpu = pVM->cCpus;
552 while (idCpu-- > 0)
553 {
554 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
555 uint32_t j = pSet->cEntries;
556 if (j <= RT_ELEMENTS(pSet->aEntries))
557 {
558 /*
559 * The set is open, close it.
560 */
561 while (j-- > 0)
562 {
563 int32_t cRefs = pSet->aEntries[j].cRefs;
564 uint32_t iPage = pSet->aEntries[j].iPage;
565 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
566 if (iPage < pThis->cPages && cRefs > 0)
567 pgmRZDynMapReleasePage(pThis, iPage, cRefs);
568 else
569 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
570
571 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
572 }
573 pSet->cEntries = PGMMAPSET_CLOSED;
574 pSet->iSubset = UINT32_MAX;
575 pSet->iCpu = -1;
576 }
577 else
578 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
579
580 j = RT_ELEMENTS(pSet->aEntries);
581 while (j-- > 0)
582 {
583 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
584 Assert(!pSet->aEntries[j].cRefs);
585 }
586 }
587
588 /*
589 * Release our reference to the mapping cache.
590 */
591 Assert(pThis->cUsers > 0);
592 pThis->cUsers--;
593 if (!pThis->cUsers)
594 pgmR0DynMapTearDown(pThis);
595 }
596 else
597 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
598
599 RTSemFastMutexRelease(pThis->hInitLock);
600}
601
602
603/**
604 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
605 *
606 * @param idCpu The current CPU.
607 * @param pvUser1 The dynamic mapping cache instance.
608 * @param pvUser2 Unused, NULL.
609 */
610static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
611{
612 Assert(!pvUser2);
613 PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)pvUser1;
614 Assert(pThis == g_pPGMR0DynMap);
615 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
616 uint32_t iPage = pThis->cPages;
617 while (iPage-- > 0)
618 ASMInvalidatePage((uintptr_t)paPages[iPage].pvPage);
619}
620
621
622/**
623 * Shoot down the TLBs for every single cache entry on all CPUs.
624 *
625 * @returns IPRT status code (RTMpOnAll).
626 * @param pThis The dynamic mapping cache instance.
627 */
628static int pgmR0DynMapTlbShootDown(PPGMRZDYNMAP pThis)
629{
630 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
631 AssertRC(rc);
632 if (RT_FAILURE(rc))
633 {
634 uint32_t iPage = pThis->cPages;
635 while (iPage-- > 0)
636 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
637 }
638 return rc;
639}
640
641
642/**
643 * Calculate the new cache size based on cMaxLoad statistics.
644 *
645 * @returns Number of pages.
646 * @param pThis The dynamic mapping cache instance.
647 * @param pcMinPages The minimal size in pages.
648 */
649static uint32_t pgmR0DynMapCalcNewSize(PPGMRZDYNMAP pThis, uint32_t *pcMinPages)
650{
651 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
652
653 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
654 RTCPUID cCpus = RTMpGetCount();
655 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
656 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
657 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
658
659 /* adjust against cMaxLoad. */
660 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
661 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
662 pThis->cMaxLoad = 0;
663
664 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
665 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
666
667 if (pThis->cMaxLoad > cMinPages)
668 cMinPages = pThis->cMaxLoad;
669
670 /* adjust against max and current size. */
671 if (cPages < pThis->cPages)
672 cPages = pThis->cPages;
673 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
674 if (cPages > PGMR0DYNMAP_MAX_PAGES)
675 cPages = PGMR0DYNMAP_MAX_PAGES;
676
677 if (cMinPages < pThis->cPages)
678 cMinPages = pThis->cPages;
679 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
680 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
681 cMinPages = PGMR0DYNMAP_MAX_PAGES;
682
683 Assert(cMinPages);
684 *pcMinPages = cMinPages;
685 return cPages;
686}
687
688
689/**
690 * Initializes the paging level data.
691 *
692 * @param pThis The dynamic mapping cache instance.
693 * @param pPgLvl The paging level data.
694 */
695void pgmR0DynMapPagingArrayInit(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
696{
697 RTCCUINTREG cr4 = ASMGetCR4();
698 switch (pThis->enmPgMode)
699 {
700 case SUPPAGINGMODE_32_BIT:
701 case SUPPAGINGMODE_32_BIT_GLOBAL:
702 pPgLvl->cLevels = 2;
703 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
704 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
705 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
706 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
707 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
708
709 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
710 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
711 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
712 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
713 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
714 break;
715
716 case SUPPAGINGMODE_PAE:
717 case SUPPAGINGMODE_PAE_GLOBAL:
718 case SUPPAGINGMODE_PAE_NX:
719 case SUPPAGINGMODE_PAE_GLOBAL_NX:
720 pPgLvl->cLevels = 3;
721 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
722 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
723 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
724 pPgLvl->a[0].fAndMask = X86_PDPE_P;
725 pPgLvl->a[0].fResMask = X86_PDPE_P;
726
727 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
728 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
729 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
730 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
731 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
732
733 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
734 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
735 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
736 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
737 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
738 break;
739
740 case SUPPAGINGMODE_AMD64:
741 case SUPPAGINGMODE_AMD64_GLOBAL:
742 case SUPPAGINGMODE_AMD64_NX:
743 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
744 pPgLvl->cLevels = 4;
745 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
746 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
747 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
748 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
749 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
750
751 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
752 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
753 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
754 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
755 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
756
757 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
758 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
759 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
760 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
761 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
762
763 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
764 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
765 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
766 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
767 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
768 break;
769
770 default:
771 AssertFailed();
772 pPgLvl->cLevels = 0;
773 break;
774 }
775
776 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
777 {
778 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
779 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
780 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
781 pPgLvl->a[i].u.pv = NULL;
782 }
783}
784
785
786/**
787 * Maps a PTE.
788 *
789 * This will update the segment structure when new PTs are mapped.
790 *
791 * It also assumes that we (for paranoid reasons) wish to establish a mapping
792 * chain from CR3 to the PT that all corresponds to the processor we're
793 * currently running on, and go about this by running with interrupts disabled
794 * and restarting from CR3 for every change.
795 *
796 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
797 * to re-enable interrupts.
798 * @param pThis The dynamic mapping cache instance.
799 * @param pPgLvl The paging level structure.
800 * @param pvPage The page.
801 * @param pSeg The segment.
802 * @param cMaxPTs The max number of PTs expected in the segment.
803 * @param ppvPTE Where to store the PTE address.
804 */
805static int pgmR0DynMapPagingArrayMapPte(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
806 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
807{
808 Assert(!(ASMGetFlags() & X86_EFL_IF));
809 void *pvEntry = NULL;
810 X86PGPAEUINT uEntry = ASMGetCR3();
811 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
812 {
813 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
814 if (pPgLvl->a[i].HCPhys != HCPhys)
815 {
816 /*
817 * Need to remap this level.
818 * The final level, the PT, will not be freed since that is what it's all about.
819 */
820 ASMIntEnable();
821 if (i + 1 == pPgLvl->cLevels)
822 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_PGM_DYNMAP_IPE);
823 else
824 {
825 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
826 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
827 }
828
829 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
830 if (RT_SUCCESS(rc))
831 {
832 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
833 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
834 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
835 if (RT_SUCCESS(rc))
836 {
837 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
838 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
839 pPgLvl->a[i].HCPhys = HCPhys;
840 if (i + 1 == pPgLvl->cLevels)
841 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
842 ASMIntDisable();
843 return VINF_TRY_AGAIN;
844 }
845
846 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
847 }
848 else
849 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
850 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
851 return rc;
852 }
853
854 /*
855 * The next level.
856 */
857 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
858 if (pThis->fLegacyMode)
859 {
860 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
861 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
862 }
863 else
864 {
865 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
866 uEntry = pPgLvl->a[i].u.paPae[iEntry];
867 }
868
869 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
870 {
871 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
872 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
873 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
874 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
875 return VERR_PGM_DYNMAP_IPE;
876 }
877 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
878 }
879
880 /* made it thru without needing to remap anything. */
881 *ppvPTE = pvEntry;
882 return VINF_SUCCESS;
883}
884
885
886/**
887 * Sets up a guard page.
888 *
889 * @param pThis The dynamic mapping cache instance.
890 * @param pPage The page.
891 */
892DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMRZDYNMAP pThis, PPGMRZDYNMAPENTRY pPage)
893{
894 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
895 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
896 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
897#ifdef PGMR0DYNMAP_GUARD_NP
898 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
899#else
900 if (pThis->fLegacyMode)
901 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
902 else
903 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
904#endif
905 pThis->cGuardPages++;
906}
907
908
909/**
910 * Adds a new segment of the specified size.
911 *
912 * @returns VBox status code.
913 * @param pThis The dynamic mapping cache instance.
914 * @param cPages The size of the new segment, give as a page count.
915 */
916static int pgmR0DynMapAddSeg(PPGMRZDYNMAP pThis, uint32_t cPages)
917{
918 int rc2;
919 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
920
921 /*
922 * Do the array reallocations first.
923 * (The pages array has to be replaced behind the spinlock of course.)
924 */
925 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
926 if (!pvSavedPTEs)
927 return VERR_NO_MEMORY;
928 pThis->pvSavedPTEs = pvSavedPTEs;
929
930 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
931 if (!pvPages)
932 {
933 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
934 if (pvSavedPTEs)
935 pThis->pvSavedPTEs = pvSavedPTEs;
936 return VERR_NO_MEMORY;
937 }
938
939 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
940
941 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
942 void *pvToFree = pThis->paPages;
943 pThis->paPages = (PPGMRZDYNMAPENTRY)pvPages;
944
945 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
946 RTMemFree(pvToFree);
947
948 /*
949 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
950 */
951 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
952 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF_DYN(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
953 if (!pSeg)
954 return VERR_NO_MEMORY;
955 pSeg->pNext = NULL;
956 pSeg->cPages = cPages;
957 pSeg->iPage = pThis->cPages;
958 pSeg->cPTs = 0;
959 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
960 if (RT_SUCCESS(rc))
961 {
962 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
963 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
964 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
965
966 /*
967 * Walk thru the pages and set them up with a mapping of their PTE and everything.
968 */
969 ASMIntDisable();
970 PGMR0DYNMAPPGLVL PgLvl;
971 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
972 uint32_t const iEndPage = pSeg->iPage + cPages;
973 for (uint32_t iPage = pSeg->iPage;
974 iPage < iEndPage;
975 iPage++, pbPage += PAGE_SIZE)
976 {
977 /* Initialize the page data. */
978 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
979 pThis->paPages[iPage].pvPage = pbPage;
980 pThis->paPages[iPage].cRefs = 0;
981 pThis->paPages[iPage].uPte.pPae = 0;
982#ifndef IN_RC
983 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
984#endif
985
986 /* Map its page table, retry until we've got a clean run (paranoia). */
987 do
988 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
989 &pThis->paPages[iPage].uPte.pv);
990 while (rc == VINF_TRY_AGAIN);
991 if (RT_FAILURE(rc))
992 break;
993
994 /* Save the PTE. */
995 if (pThis->fLegacyMode)
996 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
997 else
998 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
999
1000#ifdef VBOX_STRICT
1001 /* Check that we've got the right entry. */
1002 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
1003 RTHCPHYS HCPhysPte = pThis->fLegacyMode
1004 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
1005 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
1006 if (HCPhysPage != HCPhysPte)
1007 {
1008 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
1009 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
1010 rc = VERR_PGM_DYNMAP_IPE;
1011 break;
1012 }
1013#endif
1014 } /* for each page */
1015 ASMIntEnable();
1016
1017 /* cleanup non-PT mappings */
1018 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
1019 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
1020
1021 if (RT_SUCCESS(rc))
1022 {
1023#if PGMR0DYNMAP_GUARD_PAGES > 0
1024 /*
1025 * Setup guard pages.
1026 * (Note: TLBs will be shot down later on.)
1027 */
1028 uint32_t iPage = pSeg->iPage;
1029 while (iPage < iEndPage)
1030 {
1031 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
1032 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
1033 iPage++; /* the guarded page */
1034 }
1035
1036 /* Make sure the very last page is a guard page too. */
1037 iPage = iEndPage - 1;
1038 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
1039 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
1040#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
1041
1042 /*
1043 * Commit it by adding the segment to the list and updating the page count.
1044 */
1045 pSeg->pNext = pThis->pSegHead;
1046 pThis->pSegHead = pSeg;
1047 pThis->cPages += cPages;
1048 return VINF_SUCCESS;
1049 }
1050
1051 /*
1052 * Bail out.
1053 */
1054 while (pSeg->cPTs-- > 0)
1055 {
1056 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
1057 AssertRC(rc2);
1058 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
1059 }
1060
1061 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
1062 AssertRC(rc2);
1063 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1064 }
1065 else if (rc == VERR_NO_PAGE_MEMORY || rc == VERR_NO_PHYS_MEMORY)
1066 rc = VERR_NO_MEMORY;
1067 RTMemFree(pSeg);
1068
1069 /* Don't bother resizing the arrays, but free them if we're the only user. */
1070 if (!pThis->cPages)
1071 {
1072 RTMemFree(pThis->paPages);
1073 pThis->paPages = NULL;
1074 RTMemFree(pThis->pvSavedPTEs);
1075 pThis->pvSavedPTEs = NULL;
1076 }
1077 return rc;
1078}
1079
1080
1081/**
1082 * Called by PGMR0DynMapInitVM under the init lock.
1083 *
1084 * @returns VBox status code.
1085 * @param pThis The dynamic mapping cache instance.
1086 */
1087static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis)
1088{
1089 /*
1090 * Calc the size and add a segment of that size.
1091 */
1092 uint32_t cMinPages;
1093 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1094 AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
1095 int rc = pgmR0DynMapAddSeg(pThis, cPages);
1096 if (rc == VERR_NO_MEMORY)
1097 {
1098 /*
1099 * Try adding smaller segments.
1100 */
1101 do
1102 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1103 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1104 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1105 rc = VINF_SUCCESS;
1106 if (rc == VERR_NO_MEMORY)
1107 {
1108 if (pThis->cPages)
1109 pgmR0DynMapTearDown(pThis);
1110 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1111 }
1112 }
1113 Assert(ASMGetFlags() & X86_EFL_IF);
1114
1115#if PGMR0DYNMAP_GUARD_PAGES > 0
1116 /* paranoia */
1117 if (RT_SUCCESS(rc))
1118 pgmR0DynMapTlbShootDown(pThis);
1119#endif
1120 return rc;
1121}
1122
1123
1124/**
1125 * Called by PGMR0DynMapInitVM under the init lock.
1126 *
1127 * @returns VBox status code.
1128 * @param pThis The dynamic mapping cache instance.
1129 */
1130static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis)
1131{
1132 /*
1133 * Calc the new target size and add a segment of the appropriate size.
1134 */
1135 uint32_t cMinPages;
1136 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1137 AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
1138 if (pThis->cPages >= cPages)
1139 return VINF_SUCCESS;
1140
1141 uint32_t cAdd = cPages - pThis->cPages;
1142 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1143 if (rc == VERR_NO_MEMORY)
1144 {
1145 /*
1146 * Try adding smaller segments.
1147 */
1148 do
1149 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1150 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1151 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1152 rc = VINF_SUCCESS;
1153 if (rc == VERR_NO_MEMORY)
1154 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1155 }
1156 Assert(ASMGetFlags() & X86_EFL_IF);
1157
1158#if PGMR0DYNMAP_GUARD_PAGES > 0
1159 /* paranoia */
1160 if (RT_SUCCESS(rc))
1161 pgmR0DynMapTlbShootDown(pThis);
1162#endif
1163 return rc;
1164}
1165
1166
1167/**
1168 * Called by PGMR0DynMapTermVM under the init lock.
1169 *
1170 * @returns VBox status code.
1171 * @param pThis The dynamic mapping cache instance.
1172 */
1173static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis)
1174{
1175 /*
1176 * Restore the original page table entries
1177 */
1178 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1179 uint32_t iPage = pThis->cPages;
1180 if (pThis->fLegacyMode)
1181 {
1182 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1183 while (iPage-- > 0)
1184 {
1185 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1186 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1187 X86PGUINT uNew = paSavedPTEs[iPage];
1188 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1189 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1190 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1191 }
1192 }
1193 else
1194 {
1195 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1196 while (iPage-- > 0)
1197 {
1198 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1199 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1200 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1201 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1202 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1203 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1204 }
1205 }
1206
1207 /*
1208 * Shoot down the TLBs on all CPUs before freeing them.
1209 */
1210 pgmR0DynMapTlbShootDown(pThis);
1211
1212 /*
1213 * Free the segments.
1214 */
1215 while (pThis->pSegHead)
1216 {
1217 int rc;
1218 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1219 pThis->pSegHead = pSeg->pNext;
1220
1221 uint32_t iPT = pSeg->cPTs;
1222 while (iPT-- > 0)
1223 {
1224 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1225 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1226 }
1227 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1228 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1229 pSeg->pNext = NULL;
1230 pSeg->iPage = UINT16_MAX;
1231 pSeg->cPages = 0;
1232 pSeg->cPTs = 0;
1233 RTMemFree(pSeg);
1234 }
1235
1236 /*
1237 * Free the arrays and restore the initial state.
1238 * The cLoadMax value is left behind for the next setup.
1239 */
1240 RTMemFree(pThis->paPages);
1241 pThis->paPages = NULL;
1242 RTMemFree(pThis->pvSavedPTEs);
1243 pThis->pvSavedPTEs = NULL;
1244 pThis->cPages = 0;
1245 pThis->cLoad = 0;
1246 pThis->cGuardPages = 0;
1247}
1248
1249#endif /* IN_RING0 */
1250#ifdef IN_RC
1251
1252/**
1253 * Initializes the dynamic mapping cache in raw-mode context.
1254 *
1255 * @returns VBox status code.
1256 * @param pVM The cross context VM structure.
1257 */
1258VMMRCDECL(int) PGMRCDynMapInit(PVM pVM)
1259{
1260 /*
1261 * Allocate and initialize the instance data and page array.
1262 */
1263 PPGMRZDYNMAP pThis;
1264 size_t const cPages = MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE;
1265 size_t const cb = RT_ALIGN_Z(sizeof(*pThis), 32)
1266 + sizeof(PGMRZDYNMAPENTRY) * cPages;
1267 int rc = MMHyperAlloc(pVM, cb, 32, MM_TAG_PGM, (void **)&pThis);
1268 if (RT_FAILURE(rc))
1269 return rc;
1270
1271 pThis->u32Magic = PGMRZDYNMAP_MAGIC;
1272 pThis->paPages = RT_ALIGN_PT(pThis + 1, 32, PPGMRZDYNMAPENTRY);
1273 pThis->cPages = cPages;
1274 pThis->cLoad = 0;
1275 pThis->cMaxLoad = 0;
1276 pThis->cGuardPages = 0;
1277 pThis->cUsers = 1;
1278
1279 for (size_t iPage = 0; iPage < cPages; iPage++)
1280 {
1281 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
1282 pThis->paPages[iPage].pvPage = pVM->pgm.s.pbDynPageMapBaseGC + iPage * PAGE_SIZE;
1283 pThis->paPages[iPage].cRefs = 0;
1284 pThis->paPages[iPage].uPte.pLegacy = &pVM->pgm.s.paDynPageMap32BitPTEsGC[iPage];
1285 pThis->paPages[iPage].uPte.pPae = (PX86PTEPAE)&pVM->pgm.s.paDynPageMapPaePTEsGC[iPage];
1286 }
1287
1288 pVM->pgm.s.pRCDynMap = pThis;
1289
1290 /*
1291 * Initialize the autosets the VM.
1292 */
1293 rc = pgmRZDynMapInitAutoSetsForVM(pVM);
1294 if (RT_FAILURE(rc))
1295 return rc;
1296
1297 return VINF_SUCCESS;
1298}
1299
1300#endif /* IN_RC */
1301
1302/**
1303 * Release references to a page, caller owns the spin lock.
1304 *
1305 * @param pThis The dynamic mapping cache instance.
1306 * @param iPage The page.
1307 * @param cRefs The number of references to release.
1308 */
1309DECLINLINE(void) pgmRZDynMapReleasePageLocked(PPGMRZDYNMAP pThis, uint32_t iPage, int32_t cRefs)
1310{
1311 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1312 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1313 if (!cRefs)
1314 {
1315 pThis->cLoad--;
1316#ifdef PGMRZDYNMAP_STRICT_RELEASE
1317 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
1318 ASMAtomicBitClear(pThis->paPages[iPage].uPte.pv, X86_PTE_BIT_P);
1319 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
1320#endif
1321 }
1322}
1323
1324
1325/**
1326 * Release references to a page, caller does not own the spin lock.
1327 *
1328 * @param pThis The dynamic mapping cache instance.
1329 * @param iPage The page.
1330 * @param cRefs The number of references to release.
1331 */
1332static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1333{
1334 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1335 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
1336 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1337}
1338
1339
1340/**
1341 * pgmR0DynMapPage worker that deals with the tedious bits.
1342 *
1343 * @returns The page index on success, UINT32_MAX on failure.
1344 * @param pThis The dynamic mapping cache instance.
1345 * @param HCPhys The address of the page to be mapped.
1346 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1347 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1348 * For statistics.
1349 * @param pfNew Set to @c true if a new entry was made and @c false if
1350 * an old entry was found and reused.
1351 */
1352static uint32_t pgmR0DynMapPageSlow(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVMCPU pVCpu, bool *pfNew)
1353{
1354 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlow); RT_NOREF_PV(pVCpu);
1355
1356 /*
1357 * Check if any of the first 3 pages are unreferenced since the caller
1358 * already has made sure they aren't matching.
1359 */
1360#ifdef VBOX_WITH_STATISTICS
1361 bool fLooped = false;
1362#endif
1363 uint32_t const cPages = pThis->cPages;
1364 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1365 uint32_t iFreePage;
1366 if (!paPages[iPage].cRefs)
1367 iFreePage = iPage;
1368 else if (!paPages[(iPage + 1) % cPages].cRefs)
1369 iFreePage = (iPage + 1) % cPages;
1370 else if (!paPages[(iPage + 2) % cPages].cRefs)
1371 iFreePage = (iPage + 2) % cPages;
1372 else
1373 {
1374 /*
1375 * Search for an unused or matching entry.
1376 */
1377 iFreePage = (iPage + 3) % cPages;
1378 for (;;)
1379 {
1380 if (paPages[iFreePage].HCPhys == HCPhys)
1381 {
1382 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopHits);
1383 *pfNew = false;
1384 return iFreePage;
1385 }
1386 if (!paPages[iFreePage].cRefs)
1387 break;
1388
1389 /* advance */
1390 iFreePage = (iFreePage + 1) % cPages;
1391 if (RT_UNLIKELY(iFreePage == iPage))
1392 return UINT32_MAX;
1393 }
1394 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopMisses);
1395#ifdef VBOX_WITH_STATISTICS
1396 fLooped = true;
1397#endif
1398 }
1399 Assert(iFreePage < cPages);
1400
1401#if 0 //def VBOX_WITH_STATISTICS
1402 /* Check for lost hits. */
1403 if (!fLooped)
1404 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1405 if (paPages[iPage2].HCPhys == HCPhys)
1406 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZDynMapPageSlowLostHits);
1407#endif
1408
1409 /*
1410 * Setup the new entry.
1411 */
1412 *pfNew = true;
1413 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1414 paPages[iFreePage].HCPhys = HCPhys;
1415#ifndef IN_RC
1416 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1417
1418 if (pThis->fLegacyMode)
1419#endif
1420 {
1421 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1422 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1423 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1424 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1425 | (HCPhys & X86_PTE_PG_MASK);
1426 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1427 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1428 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1429 }
1430#ifndef IN_RC
1431 else
1432#endif
1433 {
1434 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1435 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1436 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1437 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1438 | (HCPhys & X86_PTE_PAE_PG_MASK);
1439 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1440 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1441 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1442 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1443 }
1444 return iFreePage;
1445}
1446
1447
1448/**
1449 * Maps a page into the pool.
1450 *
1451 * @returns Page index on success, UINT32_MAX on failure.
1452 * @param pThis The dynamic mapping cache instance.
1453 * @param HCPhys The address of the page to be mapped.
1454 * @param iRealCpu The real cpu set index. (optimization)
1455 * @param pVCpu The cross context virtual CPU structure of the calling
1456 * EMT. For statistics.
1457 * @param ppvPage Where to the page address.
1458 */
1459DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVMCPU pVCpu, void **ppvPage)
1460{
1461 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1462 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1463 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPage);
1464
1465 /*
1466 * Find an entry, if possible a matching one. The HCPhys address is hashed
1467 * down to a page index, collisions are handled by linear searching.
1468 * Optimized for a hit in the first 3 pages.
1469 *
1470 * Field easy hits here and defer the tedious searching and inserting
1471 * to pgmR0DynMapPageSlow().
1472 */
1473 bool fNew = false;
1474 uint32_t const cPages = pThis->cPages;
1475 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1476 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1477 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1478 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits0);
1479 else
1480 {
1481 uint32_t iPage2 = (iPage + 1) % cPages;
1482 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1483 {
1484 iPage = iPage2;
1485 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits1);
1486 }
1487 else
1488 {
1489 iPage2 = (iPage + 2) % cPages;
1490 if (paPages[iPage2].HCPhys == HCPhys)
1491 {
1492 iPage = iPage2;
1493 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits2);
1494 }
1495 else
1496 {
1497 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVCpu, &fNew);
1498 if (RT_UNLIKELY(iPage == UINT32_MAX))
1499 {
1500 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1501 *ppvPage = NULL;
1502 return iPage;
1503 }
1504 }
1505 }
1506 }
1507
1508 /*
1509 * Reference it, update statistics and get the return address.
1510 */
1511 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1512 if (cRefs == 1)
1513 {
1514 pThis->cLoad++;
1515 if (pThis->cLoad > pThis->cMaxLoad)
1516 pThis->cMaxLoad = pThis->cLoad;
1517 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1518 }
1519 else if (RT_UNLIKELY(cRefs <= 0))
1520 {
1521 ASMAtomicDecS32(&paPages[iPage].cRefs);
1522 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1523 *ppvPage = NULL;
1524 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%u HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1525 }
1526 void *pvPage = paPages[iPage].pvPage;
1527
1528#ifndef IN_RC
1529 /*
1530 * Invalidate the entry?
1531 */
1532 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1533 if (RT_UNLIKELY(fInvalidateIt))
1534 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1535#else
1536 NOREF(iRealCpu);
1537#endif
1538
1539 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1540
1541 /*
1542 * Do the actual invalidation outside the spinlock.
1543 */
1544#ifdef IN_RC
1545 if (RT_UNLIKELY(fNew))
1546#else
1547 if (RT_UNLIKELY(fInvalidateIt))
1548#endif
1549 {
1550 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageInvlPg);
1551 ASMInvalidatePage((uintptr_t)pvPage);
1552 }
1553
1554 *ppvPage = pvPage;
1555 return iPage;
1556}
1557
1558
1559/**
1560 * Assert the integrity of the pool.
1561 *
1562 * @returns VBox status code.
1563 */
1564static int pgmRZDynMapAssertIntegrity(PPGMRZDYNMAP pThis)
1565{
1566 /*
1567 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1568 */
1569 if (!pThis)
1570 return VINF_SUCCESS;
1571 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1572 AssertReturn(pThis->u32Magic == PGMRZDYNMAP_MAGIC, VERR_INVALID_MAGIC);
1573 if (!pThis->cUsers)
1574 return VERR_INVALID_PARAMETER;
1575
1576 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1577
1578#define CHECK_RET(expr, a) \
1579 do { \
1580 if (RT_UNLIKELY(!(expr))) \
1581 { \
1582 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis); \
1583 RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1584 RTAssertMsg2Weak a; \
1585 return VERR_PGM_DYNMAP_IPE; \
1586 } \
1587 } while (0)
1588
1589 /*
1590 * Check that the PTEs are correct.
1591 */
1592 uint32_t cGuard = 0;
1593 uint32_t cLoad = 0;
1594 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1595
1596#ifndef IN_RC
1597 if (pThis->fLegacyMode)
1598#endif
1599 {
1600#ifdef IN_RING0
1601 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1602#endif
1603 uint32_t iPage = pThis->cPages;
1604 while (iPage-- > 0)
1605 {
1606 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1607 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1608 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1609 {
1610#ifdef PGMR0DYNMAP_GUARD_NP
1611 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1612 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1613#else
1614 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1615 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1616#endif
1617 cGuard++;
1618 }
1619 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1620 {
1621 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1622 X86PGUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1623#ifdef IN_RING0
1624 | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1625#endif
1626 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1627 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1628 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1629 if (paPages[iPage].cRefs)
1630 cLoad++;
1631 }
1632#if defined(IN_RING0) && !defined(PGMRZDYNMAP_STRICT_RELEASE)
1633 else
1634 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1635 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1636#endif
1637 }
1638 }
1639#ifndef IN_RC
1640 else
1641#endif
1642 {
1643#ifdef IN_RING0
1644 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1645#endif
1646 uint32_t iPage = pThis->cPages;
1647 while (iPage-- > 0)
1648 {
1649 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1650 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1651 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1652 {
1653#ifdef PGMR0DYNMAP_GUARD_NP
1654 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1655 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1656#else
1657 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1658 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1659#endif
1660 cGuard++;
1661 }
1662 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1663 {
1664 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1665 X86PGPAEUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1666#ifdef IN_RING0
1667 | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1668#endif
1669 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1670 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1671 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1672 if (paPages[iPage].cRefs)
1673 cLoad++;
1674 }
1675#ifdef IN_RING0
1676 else
1677 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1678 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1679#endif
1680 }
1681 }
1682
1683 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1684 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1685
1686#undef CHECK_RET
1687 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1688 return VINF_SUCCESS;
1689}
1690
1691#ifdef IN_RING0
1692/**
1693 * Assert the integrity of the pool.
1694 *
1695 * @returns VBox status code.
1696 */
1697VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1698{
1699 return pgmRZDynMapAssertIntegrity(g_pPGMR0DynMap);
1700}
1701#endif /* IN_RING0 */
1702
1703#ifdef IN_RC
1704/**
1705 * Assert the integrity of the pool.
1706 *
1707 * @returns VBox status code.
1708 */
1709VMMRCDECL(int) PGMRCDynMapAssertIntegrity(PVM pVM)
1710{
1711 return pgmRZDynMapAssertIntegrity((PPGMRZDYNMAP)pVM->pgm.s.pRCDynMap);
1712}
1713#endif /* IN_RC */
1714
1715
1716/**
1717 * As a final resort for a (somewhat) full auto set or full cache, try merge
1718 * duplicate entries and flush the ones we can.
1719 *
1720 * @param pSet The set.
1721 */
1722static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1723{
1724 LogFlow(("pgmDynMapOptimizeAutoSet\n"));
1725
1726 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1727 {
1728 /*
1729 * Try merge entries.
1730 */
1731 uint16_t const iPage = pSet->aEntries[i].iPage;
1732 uint32_t j = i + 1;
1733 while ( j < pSet->cEntries
1734 && ( pSet->iSubset == UINT32_MAX
1735 || pSet->iSubset < pSet->cEntries) )
1736 {
1737 if (pSet->aEntries[j].iPage != iPage)
1738 j++;
1739 else
1740 {
1741 uint32_t const cHardRefs = (uint32_t)pSet->aEntries[i].cRefs
1742 + (uint32_t)pSet->aEntries[j].cRefs;
1743 uint32_t cInlinedRefs = (uint32_t)pSet->aEntries[i].cInlinedRefs
1744 + (uint32_t)pSet->aEntries[j].cInlinedRefs;
1745 uint32_t cUnrefs = (uint32_t)pSet->aEntries[i].cUnrefs
1746 + (uint32_t)pSet->aEntries[j].cUnrefs;
1747 uint32_t cSub = RT_MIN(cUnrefs, cInlinedRefs);
1748 cInlinedRefs -= cSub;
1749 cUnrefs -= cSub;
1750
1751 if ( cHardRefs < UINT16_MAX
1752 && cInlinedRefs < UINT16_MAX
1753 && cUnrefs < UINT16_MAX)
1754 {
1755 /* merge j into i removing j. */
1756 Log2(("pgmDynMapOptimizeAutoSet: Merging #%u into #%u\n", j, i));
1757 pSet->aEntries[i].cRefs = cHardRefs;
1758 pSet->aEntries[i].cInlinedRefs = cInlinedRefs;
1759 pSet->aEntries[i].cUnrefs = cUnrefs;
1760 pSet->cEntries--;
1761 if (j < pSet->cEntries)
1762 {
1763 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1764 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
1765 }
1766 else
1767 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
1768 }
1769#if 0 /* too complicated, skip it. */
1770 else
1771 {
1772 /* migrate the max number of refs from j into i and quit the inner loop. */
1773 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1774 Assert(pSet->aEntries[j].cRefs > cMigrate);
1775 pSet->aEntries[j].cRefs -= cMigrate;
1776 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1777 break;
1778 }
1779#endif
1780 }
1781 }
1782
1783 /*
1784 * Try make use of the unused hinting (cUnrefs) to evict entries
1785 * from both the set as well as the mapping cache.
1786 */
1787
1788 uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[i].cRefs + pSet->aEntries[i].cInlinedRefs;
1789 Log2(("pgmDynMapOptimizeAutoSet: #%u/%u/%u pvPage=%p iPage=%u cRefs=%u cInlinedRefs=%u cUnrefs=%u cTotalRefs=%u\n",
1790 i,
1791 pSet->iSubset,
1792 pSet->cEntries,
1793 pSet->aEntries[i].pvPage,
1794 pSet->aEntries[i].iPage,
1795 pSet->aEntries[i].cRefs,
1796 pSet->aEntries[i].cInlinedRefs,
1797 pSet->aEntries[i].cUnrefs,
1798 cTotalRefs));
1799 Assert(cTotalRefs >= pSet->aEntries[i].cUnrefs);
1800
1801 if ( cTotalRefs == pSet->aEntries[i].cUnrefs
1802 && ( pSet->iSubset == UINT32_MAX
1803 || pSet->iSubset < pSet->cEntries)
1804 )
1805 {
1806 Log2(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
1807 //LogFlow(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
1808 pgmRZDynMapReleasePage(PGMRZDYNMAP_SET_2_DYNMAP(pSet),
1809 pSet->aEntries[i].iPage,
1810 pSet->aEntries[i].cRefs);
1811 pSet->cEntries--;
1812 if (i < pSet->cEntries)
1813 {
1814 pSet->aEntries[i] = pSet->aEntries[pSet->cEntries];
1815 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
1816 }
1817
1818 i--;
1819 }
1820 }
1821}
1822
1823
1824
1825
1826/**
1827 * Signals the start of a new set of mappings.
1828 *
1829 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1830 * API is called.
1831 *
1832 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1833 */
1834VMMDECL(void) PGMRZDynMapStartAutoSet(PVMCPU pVCpu)
1835{
1836 LogFlow(("PGMRZDynMapStartAutoSet:\n"));
1837 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1838 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1839 pVCpu->pgm.s.AutoSet.cEntries = 0;
1840 pVCpu->pgm.s.AutoSet.iCpu = PGMRZDYNMAP_CUR_CPU();
1841}
1842
1843
1844#ifdef IN_RING0
1845/**
1846 * Starts or migrates the autoset of a virtual CPU.
1847 *
1848 * This is used by HMR0Enter. When we've longjumped out of the HM
1849 * execution loop with the set open, we'll migrate it when re-entering. While
1850 * under normal circumstances, we'll start it so VMXR0LoadGuestState can access
1851 * guest memory.
1852 *
1853 * @returns @c true if started, @c false if migrated.
1854 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1855 * @thread EMT
1856 */
1857VMMR0DECL(bool) PGMR0DynMapStartOrMigrateAutoSet(PVMCPU pVCpu)
1858{
1859 bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED;
1860 if (fStartIt)
1861 PGMRZDynMapStartAutoSet(pVCpu);
1862 else
1863 PGMR0DynMapMigrateAutoSet(pVCpu);
1864 return fStartIt;
1865}
1866#endif /* IN_RING0 */
1867
1868
1869/**
1870 * Checks if the set has high load.
1871 *
1872 * @returns true on high load, otherwise false.
1873 * @param pSet The set.
1874 */
1875DECLINLINE(bool) pgmRZDynMapHasHighLoad(PPGMMAPSET pSet)
1876{
1877#ifdef IN_RC
1878 if (pSet->cEntries < MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE / 2)
1879 return false;
1880#endif
1881
1882 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
1883 uint32_t cUnusedPages = pThis->cPages - pThis->cLoad;
1884#ifdef IN_RC
1885 return cUnusedPages <= MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE * 36 / 100;
1886#else
1887 return cUnusedPages <= PGMR0DYNMAP_PAGES_PER_CPU_MIN;
1888#endif
1889}
1890
1891
1892/**
1893 * Worker that performs the actual flushing of the set.
1894 *
1895 * @param pSet The set to flush.
1896 * @param cEntries The number of entries.
1897 */
1898DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1899{
1900 /*
1901 * Release any pages it's referencing.
1902 */
1903 if ( cEntries != 0
1904 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1905 {
1906 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
1907 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1908
1909 uint32_t i = cEntries;
1910 while (i-- > 0)
1911 {
1912 uint32_t iPage = pSet->aEntries[i].iPage;
1913 Assert(iPage < pThis->cPages);
1914 int32_t cRefs = pSet->aEntries[i].cRefs;
1915 Assert(cRefs > 0);
1916 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
1917
1918 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
1919 }
1920
1921 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1922 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1923 }
1924}
1925
1926
1927/**
1928 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1929 * since the PGMDynMapStartAutoSet call.
1930 *
1931 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1932 */
1933VMMDECL(void) PGMRZDynMapReleaseAutoSet(PVMCPU pVCpu)
1934{
1935 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1936
1937 /*
1938 * Close and flush the set.
1939 */
1940 uint32_t cEntries = pSet->cEntries;
1941 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1942 pSet->cEntries = PGMMAPSET_CLOSED;
1943 pSet->iSubset = UINT32_MAX;
1944 pSet->iCpu = -1;
1945
1946#ifdef IN_RC
1947 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
1948 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
1949 else
1950#endif
1951 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1952 if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
1953 Log(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
1954 else
1955 LogFlow(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
1956
1957 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1958}
1959
1960
1961/**
1962 * Flushes the set if it's above a certain threshold.
1963 *
1964 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1965 */
1966VMMDECL(void) PGMRZDynMapFlushAutoSet(PVMCPU pVCpu)
1967{
1968 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1969 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
1970
1971 /*
1972 * Only flush it if it's 45% full.
1973 */
1974 uint32_t cEntries = pSet->cEntries;
1975 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1976 Assert(pSet->iSubset == UINT32_MAX);
1977#ifdef IN_RC
1978 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
1979 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
1980 else
1981#endif
1982 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1983 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100
1984 || pgmRZDynMapHasHighLoad(pSet))
1985 {
1986 pSet->cEntries = 0;
1987 Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
1988
1989 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1990 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
1991 }
1992}
1993
1994
1995#ifndef IN_RC
1996/**
1997 * Migrates the automatic mapping set of the current vCPU if it's active and
1998 * necessary.
1999 *
2000 * This is called when re-entering the hardware assisted execution mode after a
2001 * nip down to ring-3. We run the risk that the CPU might have change and we
2002 * will therefore make sure all the cache entries currently in the auto set will
2003 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
2004 * the entries will have been flagged as invalidated.
2005 *
2006 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2007 * @thread EMT
2008 */
2009VMMR0DECL(void) PGMR0DynMapMigrateAutoSet(PVMCPU pVCpu)
2010{
2011 LogFlow(("PGMR0DynMapMigrateAutoSet\n"));
2012 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2013 int32_t iRealCpu = PGMRZDYNMAP_CUR_CPU();
2014 if (pSet->iCpu != iRealCpu)
2015 {
2016 uint32_t i = pSet->cEntries;
2017 if (i != PGMMAPSET_CLOSED)
2018 {
2019 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
2020 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
2021 {
2022 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2023 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
2024
2025 while (i-- > 0)
2026 {
2027 Assert(pSet->aEntries[i].cRefs > 0);
2028 uint32_t iPage = pSet->aEntries[i].iPage;
2029 Assert(iPage < pThis->cPages);
2030 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
2031 {
2032 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
2033 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2034
2035 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
2036 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapMigrateInvlPg);
2037
2038 PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis);
2039 }
2040 }
2041
2042 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2043 }
2044 }
2045 pSet->iCpu = iRealCpu;
2046 }
2047}
2048#endif /* !IN_RC */
2049
2050
2051/**
2052 * Worker function that flushes the current subset.
2053 *
2054 * This is called when the set is popped or when the set
2055 * hash a too high load. As also pointed out elsewhere, the
2056 * whole subset thing is a hack for working around code that
2057 * accesses too many pages. Like PGMPool.
2058 *
2059 * @param pSet The set which subset to flush.
2060 */
2061static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
2062{
2063 uint32_t iSubset = pSet->iSubset;
2064 uint32_t i = pSet->cEntries;
2065 Assert(i <= RT_ELEMENTS(pSet->aEntries));
2066 if ( i > iSubset
2067 && i <= RT_ELEMENTS(pSet->aEntries))
2068 {
2069 Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
2070 pSet->cEntries = iSubset;
2071
2072 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2073 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
2074
2075 while (i-- > iSubset)
2076 {
2077 uint32_t iPage = pSet->aEntries[i].iPage;
2078 Assert(iPage < pThis->cPages);
2079 int32_t cRefs = pSet->aEntries[i].cRefs;
2080 Assert(cRefs > 0);
2081 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
2082
2083 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
2084 }
2085
2086 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2087 }
2088}
2089
2090
2091/**
2092 * Creates a subset.
2093 *
2094 * A subset is a hack to avoid having to rewrite code that touches a lot of
2095 * pages. It prevents the mapping set from being overflowed by automatically
2096 * flushing previous mappings when a certain threshold is reached.
2097 *
2098 * Pages mapped after calling this function are only valid until the next page
2099 * is mapped.
2100 *
2101 * @returns The index of the previous subset. Pass this to
2102 * PGMDynMapPopAutoSubset when popping it.
2103 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2104 */
2105VMMDECL(uint32_t) PGMRZDynMapPushAutoSubset(PVMCPU pVCpu)
2106{
2107 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2108 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
2109 uint32_t iPrevSubset = pSet->iSubset;
2110 LogFlow(("PGMRZDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset));
2111
2112 /*
2113 * If it looks like we're approaching the max set size or mapping space
2114 * optimize the set to drop off unused pages.
2115 */
2116 if ( pSet->cEntries > RT_ELEMENTS(pSet->aEntries) * 60 / 100
2117 || pgmRZDynMapHasHighLoad(pSet))
2118 {
2119 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2120 pgmDynMapOptimizeAutoSet(pSet);
2121 }
2122
2123 pSet->iSubset = pSet->cEntries;
2124 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSubsets);
2125
2126 AssertMsg(iPrevSubset <= pSet->iSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%#x iSubset=%#x\n", iPrevSubset, pSet->iSubset));
2127 return iPrevSubset;
2128}
2129
2130
2131/**
2132 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
2133 *
2134 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2135 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
2136 */
2137VMMDECL(void) PGMRZDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
2138{
2139 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2140 uint32_t cEntries = pSet->cEntries;
2141 LogFlow(("PGMRZDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries));
2142 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
2143 AssertMsgReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%u iSubset=%u cEntries=%u\n", iPrevSubset, pSet->iSubset, cEntries));
2144#ifdef IN_RC
2145 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
2146 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
2147 else
2148#endif
2149 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
2150 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
2151 && cEntries != pSet->iSubset)
2152 {
2153 pgmDynMapFlushSubset(pSet);
2154 Assert(pSet->cEntries >= iPrevSubset || iPrevSubset == UINT32_MAX);
2155 }
2156 pSet->iSubset = iPrevSubset;
2157}
2158
2159
2160/**
2161 * Indicates that the given page is unused and its mapping can be re-used.
2162 *
2163 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2164 * @param pvHint The page that is now unused. This does not have to
2165 * point at the start of the page. NULL is ignored.
2166 */
2167#ifdef LOG_ENABLED
2168void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint, RT_SRC_POS_DECL)
2169#else
2170void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint)
2171#endif
2172{
2173 /*
2174 * Ignore NULL pointers and mask off the page offset bits.
2175 */
2176 if (pvHint == NULL)
2177 return;
2178 pvHint = (void *)((uintptr_t)pvHint & ~(uintptr_t)PAGE_OFFSET_MASK);
2179
2180 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2181 uint32_t iEntry = pSet->cEntries;
2182 AssertReturnVoid(iEntry > 0);
2183
2184 /*
2185 * Find the entry in the usual unrolled fashion.
2186 */
2187 /** @todo add a hint to the set which entry was used last since it's not
2188 * always the last entry? */
2189#define IS_MATCHING_ENTRY(pSet, iEntry, pvHint) \
2190 ( (pSet)->aEntries[(iEntry)].pvPage == (pvHint) \
2191 && (uint32_t)(pSet)->aEntries[(iEntry)].cRefs + (pSet)->aEntries[(iEntry)].cInlinedRefs \
2192 > (pSet)->aEntries[(iEntry)].cUnrefs )
2193 if ( iEntry >= 1 && IS_MATCHING_ENTRY(pSet, iEntry - 1, pvHint))
2194 iEntry = iEntry - 1;
2195 else if (iEntry >= 2 && IS_MATCHING_ENTRY(pSet, iEntry - 2, pvHint))
2196 iEntry = iEntry - 2;
2197 else if (iEntry >= 3 && IS_MATCHING_ENTRY(pSet, iEntry - 3, pvHint))
2198 iEntry = iEntry - 3;
2199 else if (iEntry >= 4 && IS_MATCHING_ENTRY(pSet, iEntry - 4, pvHint))
2200 iEntry = iEntry - 4;
2201 else if (iEntry >= 5 && IS_MATCHING_ENTRY(pSet, iEntry - 5, pvHint))
2202 iEntry = iEntry - 5;
2203 else if (iEntry >= 6 && IS_MATCHING_ENTRY(pSet, iEntry - 6, pvHint))
2204 iEntry = iEntry - 6;
2205 else if (iEntry >= 7 && IS_MATCHING_ENTRY(pSet, iEntry - 7, pvHint))
2206 iEntry = iEntry - 7;
2207 else
2208 {
2209 /*
2210 * Loop till we find it.
2211 */
2212 bool fFound = false;
2213 if (iEntry > 7)
2214 {
2215 iEntry -= 7;
2216 while (iEntry-- > 0)
2217 if (IS_MATCHING_ENTRY(pSet, iEntry, pvHint))
2218 {
2219 fFound = true;
2220 break;
2221 }
2222 }
2223 AssertMsgReturnVoid(fFound,
2224 ("pvHint=%p cEntries=%#x iSubset=%#x\n"
2225 "aEntries[0] = {%#x, %#x, %#x, %#x, %p}\n"
2226 "aEntries[1] = {%#x, %#x, %#x, %#x, %p}\n"
2227 "aEntries[2] = {%#x, %#x, %#x, %#x, %p}\n"
2228 "aEntries[3] = {%#x, %#x, %#x, %#x, %p}\n"
2229 "aEntries[4] = {%#x, %#x, %#x, %#x, %p}\n"
2230 "aEntries[5] = {%#x, %#x, %#x, %#x, %p}\n"
2231 ,
2232 pvHint, pSet->cEntries, pSet->iSubset,
2233 pSet->aEntries[0].iPage, pSet->aEntries[0].cRefs, pSet->aEntries[0].cInlinedRefs, pSet->aEntries[0].cUnrefs, pSet->aEntries[0].pvPage,
2234 pSet->aEntries[1].iPage, pSet->aEntries[1].cRefs, pSet->aEntries[1].cInlinedRefs, pSet->aEntries[1].cUnrefs, pSet->aEntries[1].pvPage,
2235 pSet->aEntries[2].iPage, pSet->aEntries[2].cRefs, pSet->aEntries[2].cInlinedRefs, pSet->aEntries[2].cUnrefs, pSet->aEntries[2].pvPage,
2236 pSet->aEntries[3].iPage, pSet->aEntries[3].cRefs, pSet->aEntries[3].cInlinedRefs, pSet->aEntries[3].cUnrefs, pSet->aEntries[3].pvPage,
2237 pSet->aEntries[4].iPage, pSet->aEntries[4].cRefs, pSet->aEntries[4].cInlinedRefs, pSet->aEntries[4].cUnrefs, pSet->aEntries[4].pvPage,
2238 pSet->aEntries[5].iPage, pSet->aEntries[5].cRefs, pSet->aEntries[5].cInlinedRefs, pSet->aEntries[5].cUnrefs, pSet->aEntries[5].pvPage));
2239 }
2240#undef IS_MATCHING_ENTRY
2241
2242 /*
2243 * Update it.
2244 */
2245 uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[iEntry].cRefs + pSet->aEntries[iEntry].cInlinedRefs;
2246 uint32_t const cUnrefs = pSet->aEntries[iEntry].cUnrefs;
2247 LogFlow(("pgmRZDynMapUnusedHint: pvHint=%p #%u cRefs=%d cInlinedRefs=%d cUnrefs=%d (+1) cTotalRefs=%d %s(%d) %s\n",
2248 pvHint, iEntry, pSet->aEntries[iEntry].cRefs, pSet->aEntries[iEntry].cInlinedRefs, cUnrefs, cTotalRefs, pszFile, iLine, pszFunction));
2249 AssertReturnVoid(cTotalRefs > cUnrefs);
2250
2251 if (RT_LIKELY(cUnrefs < UINT16_MAX - 1))
2252 pSet->aEntries[iEntry].cUnrefs++;
2253 else if (pSet->aEntries[iEntry].cInlinedRefs)
2254 {
2255 uint32_t cSub = RT_MIN(pSet->aEntries[iEntry].cInlinedRefs, pSet->aEntries[iEntry].cUnrefs);
2256 pSet->aEntries[iEntry].cInlinedRefs -= cSub;
2257 pSet->aEntries[iEntry].cUnrefs -= cSub;
2258 pSet->aEntries[iEntry].cUnrefs++;
2259 }
2260 else
2261 Log(("pgmRZDynMapUnusedHint: pvHint=%p ignored because of overflow! %s(%d) %s\n", pvHint, pszFile, iLine, pszFunction));
2262
2263#ifdef PGMRZDYNMAP_STRICT_RELEASE
2264 /*
2265 * Optimize the set to trigger the unmapping and invalidation of the page.
2266 */
2267 if (cUnrefs + 1 == cTotalRefs)
2268 pgmDynMapOptimizeAutoSet(pSet);
2269#endif
2270}
2271
2272
2273/**
2274 * Common worker code for pgmRZDynMapHCPageInlined, pgmRZDynMapHCPageV2Inlined
2275 * and pgmR0DynMapGCPageOffInlined.
2276 *
2277 * @returns VINF_SUCCESS, bails out to ring-3 on failure.
2278 * @param pSet The set.
2279 * @param HCPhys The physical address of the page.
2280 * @param ppv Where to store the address of the mapping on success.
2281 *
2282 * @remarks This is a very hot path.
2283 */
2284int pgmRZDynMapHCPageCommon(PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL)
2285{
2286 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
2287 PVMCPU pVCpu = PGMRZDYNMAP_SET_2_VMCPU(pSet);
2288 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2289
2290 /*
2291 * Map it.
2292 */
2293 void *pvPage;
2294 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2295 uint32_t iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
2296 if (RT_UNLIKELY(iPage == UINT32_MAX))
2297 {
2298 /*
2299 * We're out of mapping space, optimize our set to try remedy the
2300 * situation. (Only works if there are unreference hints.)
2301 */
2302 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2303 pgmDynMapOptimizeAutoSet(pSet);
2304
2305 iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
2306 if (RT_UNLIKELY(iPage == UINT32_MAX))
2307 {
2308 RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
2309 pThis->cLoad, pThis->cMaxLoad, pThis->cPages, pThis->cGuardPages);
2310 if (!g_fPGMR0DynMapTestRunning)
2311 VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
2312 *ppv = NULL;
2313 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2314 return VERR_PGM_DYNMAP_FAILED;
2315 }
2316 }
2317
2318 /*
2319 * Add the page to the auto reference set.
2320 *
2321 * The typical usage pattern means that the same pages will be mapped
2322 * several times in the same set. We can catch most of these
2323 * remappings by looking a few pages back into the set. (The searching
2324 * and set optimizing path will hardly ever be used when doing this.)
2325 */
2326 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
2327 int32_t i = pSet->cEntries;
2328 if (i-- < 5)
2329 {
2330 unsigned iEntry = pSet->cEntries++;
2331 pSet->aEntries[iEntry].cRefs = 1;
2332 pSet->aEntries[iEntry].cUnrefs = 0;
2333 pSet->aEntries[iEntry].cInlinedRefs = 0;
2334 pSet->aEntries[iEntry].iPage = iPage;
2335 pSet->aEntries[iEntry].pvPage = pvPage;
2336 pSet->aEntries[iEntry].HCPhys = HCPhys;
2337 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2338 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/0/0 iPage=%#x [a] %s(%d) %s\n",
2339 pSet, HCPhys, iEntry, iEntry + 1, pvPage, 1, iPage, pszFile, iLine, pszFunction));
2340 }
2341 /* Any of the last 5 pages? */
2342 else if ( pSet->aEntries[i - 0].iPage == iPage
2343 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
2344 {
2345 pSet->aEntries[i - 0].cRefs++;
2346 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [0] %s(%d) %s\n", pSet, HCPhys, i - 0, pSet->cEntries, pvPage, pSet->aEntries[i - 0].cRefs, pSet->aEntries[i - 0].cInlinedRefs, pSet->aEntries[i - 0].cUnrefs, iPage, pszFile, iLine, pszFunction));
2347 }
2348 else if ( pSet->aEntries[i - 1].iPage == iPage
2349 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
2350 {
2351 pSet->aEntries[i - 1].cRefs++;
2352 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [1] %s(%d) %s\n", pSet, HCPhys, i - 1, pSet->cEntries, pvPage, pSet->aEntries[i - 1].cRefs, pSet->aEntries[i - 1].cInlinedRefs, pSet->aEntries[i - 1].cUnrefs, iPage, pszFile, iLine, pszFunction));
2353 }
2354 else if ( pSet->aEntries[i - 2].iPage == iPage
2355 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
2356 {
2357 pSet->aEntries[i - 2].cRefs++;
2358 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [2] %s(%d) %s\n", pSet, HCPhys, i - 2, pSet->cEntries, pvPage, pSet->aEntries[i - 2].cRefs, pSet->aEntries[i - 2].cInlinedRefs, pSet->aEntries[i - 2].cUnrefs, iPage, pszFile, iLine, pszFunction));
2359 }
2360 else if ( pSet->aEntries[i - 3].iPage == iPage
2361 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
2362 {
2363 pSet->aEntries[i - 3].cRefs++;
2364 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 3, pSet->cEntries, pvPage, pSet->aEntries[i - 3].cRefs, pSet->aEntries[i - 3].cInlinedRefs, pSet->aEntries[i - 3].cUnrefs, iPage, pszFile, iLine, pszFunction));
2365 }
2366 else if ( pSet->aEntries[i - 4].iPage == iPage
2367 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
2368 {
2369 pSet->aEntries[i - 4].cRefs++;
2370 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 4, pSet->cEntries, pvPage, pSet->aEntries[i - 4].cRefs, pSet->aEntries[i - 4].cInlinedRefs, pSet->aEntries[i - 4].cUnrefs, iPage, pszFile, iLine, pszFunction));
2371 }
2372 /* Don't bother searching unless we're above a 60% load. */
2373 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
2374 {
2375 unsigned iEntry = pSet->cEntries++;
2376 pSet->aEntries[iEntry].cRefs = 1;
2377 pSet->aEntries[iEntry].cUnrefs = 0;
2378 pSet->aEntries[iEntry].cInlinedRefs = 0;
2379 pSet->aEntries[iEntry].iPage = iPage;
2380 pSet->aEntries[iEntry].pvPage = pvPage;
2381 pSet->aEntries[iEntry].HCPhys = HCPhys;
2382 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2383 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [b] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
2384 }
2385 else
2386 {
2387 /* Search the rest of the set. */
2388 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
2389 i -= 4;
2390 while (i-- > 0)
2391 if ( pSet->aEntries[i].iPage == iPage
2392 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
2393 {
2394 pSet->aEntries[i].cRefs++;
2395 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchHits);
2396 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [c] %s(%d) %s\n", pSet, HCPhys, i, pSet->cEntries, pvPage, pSet->aEntries[i].cRefs, pSet->aEntries[i].cInlinedRefs, pSet->aEntries[i].cUnrefs, iPage, pszFile, iLine, pszFunction));
2397 break;
2398 }
2399 if (i < 0)
2400 {
2401 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchMisses);
2402#if 0 /* this is very bogus */
2403 if (pSet->iSubset < pSet->cEntries)
2404 {
2405 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchFlushes);
2406 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
2407 pgmDynMapFlushSubset(pSet);
2408 }
2409#endif
2410
2411 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
2412 {
2413 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2414 pgmDynMapOptimizeAutoSet(pSet);
2415 }
2416
2417 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
2418 {
2419 unsigned iEntry = pSet->cEntries++;
2420 pSet->aEntries[iEntry].cRefs = 1;
2421 pSet->aEntries[iEntry].cUnrefs = 0;
2422 pSet->aEntries[iEntry].cInlinedRefs = 0;
2423 pSet->aEntries[iEntry].iPage = iPage;
2424 pSet->aEntries[iEntry].pvPage = pvPage;
2425 pSet->aEntries[iEntry].HCPhys = HCPhys;
2426 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2427 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [d] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
2428 }
2429 else
2430 {
2431 /* We're screwed. */
2432 pgmRZDynMapReleasePage(pThis, iPage, 1);
2433
2434 RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: set is full!\n");
2435 if (!g_fPGMR0DynMapTestRunning)
2436 VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
2437 *ppv = NULL;
2438 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2439 return VERR_PGM_DYNMAP_FULL_SET;
2440 }
2441 }
2442 }
2443
2444 *ppv = pvPage;
2445 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2446 return VINF_SUCCESS;
2447}
2448
2449
2450#if 0 /*def DEBUG*/
2451/** For pgmR0DynMapTest3PerCpu. */
2452typedef struct PGMR0DYNMAPTEST
2453{
2454 uint32_t u32Expect;
2455 uint32_t *pu32;
2456 uint32_t volatile cFailures;
2457} PGMR0DYNMAPTEST;
2458typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
2459
2460/**
2461 * Checks that the content of the page is the same on all CPUs, i.e. that there
2462 * are no CPU specific PTs or similar nasty stuff involved.
2463 *
2464 * @param idCpu The current CPU.
2465 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
2466 * @param pvUser2 Unused, ignored.
2467 */
2468static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
2469{
2470 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
2471 ASMInvalidatePage(pTest->pu32);
2472 if (*pTest->pu32 != pTest->u32Expect)
2473 ASMAtomicIncU32(&pTest->cFailures);
2474 NOREF(pvUser2); NOREF(idCpu);
2475}
2476
2477
2478/**
2479 * Performs some basic tests in debug builds.
2480 */
2481static int pgmR0DynMapTest(PVM pVM)
2482{
2483 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
2484 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
2485 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2486 uint32_t i;
2487
2488 /*
2489 * Assert internal integrity first.
2490 */
2491 LogRel(("Test #0\n"));
2492 int rc = PGMR0DynMapAssertIntegrity();
2493 if (RT_FAILURE(rc))
2494 return rc;
2495
2496 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
2497 pVM->pgm.s.pvR0DynMapUsed = pThis;
2498 g_fPGMR0DynMapTestRunning = true;
2499
2500 /*
2501 * Simple test, map CR3 twice and check that we're getting the
2502 * same mapping address back.
2503 */
2504 LogRel(("Test #1\n"));
2505 ASMIntDisable();
2506 PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
2507
2508 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2509 void *pv = (void *)(intptr_t)-1;
2510 void *pv2 = (void *)(intptr_t)-2;
2511 rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv RTLOG_COMMA_SRC_POS);
2512 int rc2 = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
2513 ASMIntEnable();
2514 if ( RT_SUCCESS(rc2)
2515 && RT_SUCCESS(rc)
2516 && pv == pv2)
2517 {
2518 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2519 rc = PGMR0DynMapAssertIntegrity();
2520
2521 /*
2522 * Check that the simple set overflow code works by filling it
2523 * with more CR3 mappings.
2524 */
2525 LogRel(("Test #2\n"));
2526 ASMIntDisable();
2527 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2528 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2529 {
2530 pv2 = (void *)(intptr_t)-4;
2531 rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
2532 }
2533 ASMIntEnable();
2534 if (RT_FAILURE(rc) || pv != pv2)
2535 {
2536 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2537 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2538 }
2539 else if (pSet->cEntries != 5)
2540 {
2541 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2542 rc = VERR_PGM_DYNMAP_IPE;
2543 }
2544 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2545 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2546 || pSet->aEntries[2].cRefs != 1
2547 || pSet->aEntries[1].cRefs != 1
2548 || pSet->aEntries[0].cRefs != 1)
2549 {
2550 LogRel(("failed(%d): bad set dist: ", __LINE__));
2551 for (i = 0; i < pSet->cEntries; i++)
2552 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2553 LogRel(("\n"));
2554 rc = VERR_PGM_DYNMAP_IPE;
2555 }
2556 if (RT_SUCCESS(rc))
2557 rc = PGMR0DynMapAssertIntegrity();
2558 if (RT_SUCCESS(rc))
2559 {
2560 /*
2561 * Trigger an set optimization run (exactly).
2562 */
2563 LogRel(("Test #3\n"));
2564 ASMIntDisable();
2565 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2566 pv2 = NULL;
2567 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2568 {
2569 pv2 = (void *)(intptr_t)(-5 - i);
2570 rc = pgmRZDynMapHCPageCommon(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
2571 }
2572 ASMIntEnable();
2573 if (RT_FAILURE(rc) || pv == pv2)
2574 {
2575 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2576 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2577 }
2578 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2579 {
2580 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2581 rc = VERR_PGM_DYNMAP_IPE;
2582 }
2583 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2584 if (RT_SUCCESS(rc))
2585 rc = PGMR0DynMapAssertIntegrity();
2586 if (RT_SUCCESS(rc))
2587 {
2588 /*
2589 * Trigger an overflow error.
2590 */
2591 LogRel(("Test #4\n"));
2592 ASMIntDisable();
2593 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2594 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2595 {
2596 rc = pgmRZDynMapHCPageCommon(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
2597 if (RT_SUCCESS(rc))
2598 rc = PGMR0DynMapAssertIntegrity();
2599 if (RT_FAILURE(rc))
2600 break;
2601 }
2602 ASMIntEnable();
2603 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2604 {
2605 /* flush the set. */
2606 LogRel(("Test #5\n"));
2607 ASMIntDisable();
2608 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2609 PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
2610 PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
2611 ASMIntEnable();
2612
2613 rc = PGMR0DynMapAssertIntegrity();
2614 }
2615 else
2616 {
2617 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2618 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2619 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2620 }
2621 }
2622 }
2623 }
2624 else
2625 {
2626 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2627 if (RT_SUCCESS(rc))
2628 rc = rc2;
2629 }
2630
2631 /*
2632 * Check that everyone sees the same stuff.
2633 */
2634 if (RT_SUCCESS(rc))
2635 {
2636 LogRel(("Test #5\n"));
2637 ASMIntDisable();
2638 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2639 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2640 rc = pgmRZDynMapHCPageCommon(pVM, HCPhysPT, &pv RTLOG_COMMA_SRC_POS);
2641 if (RT_SUCCESS(rc))
2642 {
2643 PGMR0DYNMAPTEST Test;
2644 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2645 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2646 Test.u32Expect = *pu32Real;
2647 ASMAtomicWriteU32(&Test.cFailures, 0);
2648 ASMIntEnable();
2649
2650 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2651 if (RT_FAILURE(rc))
2652 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2653 else if (Test.cFailures)
2654 {
2655 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2656 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2657 rc = VERR_PGM_DYNMAP_IPE;
2658 }
2659 else
2660 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2661 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2662 }
2663 else
2664 {
2665 ASMIntEnable();
2666 LogRel(("failed(%d): rc=%Rrc\n", rc));
2667 }
2668 }
2669
2670 /*
2671 * Clean up.
2672 */
2673 LogRel(("Cleanup.\n"));
2674 ASMIntDisable();
2675 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2676 PGMRZDynMapFlushAutoSet(&pVM->aCpus[0]);
2677 PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
2678 ASMIntEnable();
2679
2680 if (RT_SUCCESS(rc))
2681 rc = PGMR0DynMapAssertIntegrity();
2682 else
2683 PGMR0DynMapAssertIntegrity();
2684
2685 g_fPGMR0DynMapTestRunning = false;
2686 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2687 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2688 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2689 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2690 return rc;
2691}
2692#endif /* DEBUG */
2693
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette