VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14785

Last change on this file since 14785 was 14757, checked in by vboxsync, 16 years ago

#3202: do PGMDynMapReleaseAutoSet/PGMDynMapStartAutoSet in the VMXR0RunGuestCode loop or we'll run out of entries very quickly (xp/pgmpool).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.1 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14757 2008-11-28 03:24:01Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((8*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 64
52/** The number of guard pages. */
53#if defined(VBOX_STRICT)
54# define PGMR0DYNMAP_GUARD_PAGES 4
55#else
56# define PGMR0DYNMAP_GUARD_PAGES 0
57#endif
58/** The dummy physical address of guard pages. */
59#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
60/** The dummy reference count of guard pages. (Must be non-zero.) */
61#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
62#if 0
63/** Define this to just clear the present bit on guard pages.
64 * The alternative is to replace the entire PTE with an bad not-present
65 * PTE. Either way, XNU will screw us. :-/ */
66#define PGMR0DYNMAP_GUARD_NP
67#endif
68/** The dummy PTE value for a page. */
69#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
70/** The dummy PTE value for a page. */
71#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
72/** Calcs the overload threshold. Current set at 50%. */
73#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
74
75#if 0
76/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
77#define RTSpinlockAcquire(a,b) do {} while (0)
78#define RTSpinlockRelease(a,b) do {} while (0)
79#endif
80
81
82/*******************************************************************************
83* Structures and Typedefs *
84*******************************************************************************/
85/**
86 * Ring-0 dynamic mapping cache segment.
87 *
88 * The dynamic mapping cache can be extended with additional segments if the
89 * load is found to be too high. This done the next time a VM is created, under
90 * the protection of the init mutex. The arrays is reallocated and the new
91 * segment is added to the end of these. Nothing is rehashed of course, as the
92 * indexes / addresses must remain unchanged.
93 *
94 * This structure is only modified while owning the init mutex or during module
95 * init / term.
96 */
97typedef struct PGMR0DYNMAPSEG
98{
99 /** Pointer to the next segment. */
100 struct PGMR0DYNMAPSEG *pNext;
101 /** The memory object for the virtual address range that we're abusing. */
102 RTR0MEMOBJ hMemObj;
103 /** The start page in the cache. (I.e. index into the arrays.) */
104 uint16_t iPage;
105 /** The number of pages this segment contributes. */
106 uint16_t cPages;
107 /** The number of page tables. */
108 uint16_t cPTs;
109 /** The memory objects for the page tables. */
110 RTR0MEMOBJ ahMemObjPTs[1];
111} PGMR0DYNMAPSEG;
112/** Pointer to a ring-0 dynamic mapping cache segment. */
113typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
114
115
116/**
117 * Ring-0 dynamic mapping cache entry.
118 *
119 * This structure tracks
120 */
121typedef struct PGMR0DYNMAPENTRY
122{
123 /** The physical address of the currently mapped page.
124 * This is duplicate for three reasons: cache locality, cache policy of the PT
125 * mappings and sanity checks. */
126 RTHCPHYS HCPhys;
127 /** Pointer to the page. */
128 void *pvPage;
129 /** The number of references. */
130 int32_t volatile cRefs;
131 /** PTE pointer union. */
132 union PGMR0DYNMAPENTRY_PPTE
133 {
134 /** PTE pointer, 32-bit legacy version. */
135 PX86PTE pLegacy;
136 /** PTE pointer, PAE version. */
137 PX86PTEPAE pPae;
138 /** PTE pointer, the void version. */
139 void *pv;
140 } uPte;
141 /** CPUs that haven't invalidated this entry after it's last update. */
142 RTCPUSET PendingSet;
143} PGMR0DYNMAPENTRY;
144/** Pointer to a ring-0 dynamic mapping cache entry. */
145typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
146
147
148/**
149 * Ring-0 dynamic mapping cache.
150 *
151 * This is initialized during VMMR0 module init but no segments are allocated at
152 * that time. Segments will be added when the first VM is started and removed
153 * again when the last VM shuts down, thus avoid consuming memory while dormant.
154 * At module termination, the remaining bits will be freed up.
155 */
156typedef struct PGMR0DYNMAP
157{
158 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
159 uint32_t u32Magic;
160 /** Spinlock serializing the normal operation of the cache. */
161 RTSPINLOCK hSpinlock;
162 /** Array for tracking and managing the pages. */
163 PPGMR0DYNMAPENTRY paPages;
164 /** The cache size given as a number of pages. */
165 uint32_t cPages;
166 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
167 bool fLegacyMode;
168 /** The current load.
169 * This does not include guard pages. */
170 uint32_t cLoad;
171 /** The max load ever.
172 * This is maintained to get trigger adding of more mapping space. */
173 uint32_t cMaxLoad;
174 /** Initialization / termination lock. */
175 RTSEMFASTMUTEX hInitLock;
176 /** The number of guard pages. */
177 uint32_t cGuardPages;
178 /** The number of users (protected by hInitLock). */
179 uint32_t cUsers;
180 /** Array containing a copy of the original page tables.
181 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
182 void *pvSavedPTEs;
183 /** List of segments. */
184 PPGMR0DYNMAPSEG pSegHead;
185 /** The paging mode. */
186 SUPPAGINGMODE enmPgMode;
187} PGMR0DYNMAP;
188/** Pointer to the ring-0 dynamic mapping cache */
189typedef PGMR0DYNMAP *PPGMR0DYNMAP;
190
191/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
192#define PGMR0DYNMAP_MAGIC 0x19640201
193
194
195/**
196 * Paging level data.
197 */
198typedef struct PGMR0DYNMAPPGLVL
199{
200 uint32_t cLevels; /**< The number of levels. */
201 struct
202 {
203 RTHCPHYS HCPhys; /**< The address of the page for the current level,
204 * i.e. what hMemObj/hMapObj is currently mapping. */
205 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
206 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
207 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
208 uint32_t fPtrShift; /**< The pointer shift count. */
209 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
210 uint64_t fAndMask; /**< And mask to check entry flags. */
211 uint64_t fResMask; /**< The result from applying fAndMask. */
212 union
213 {
214 void *pv; /**< hMapObj address. */
215 PX86PGUINT paLegacy; /**< Legacy table view. */
216 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
217 } u;
218 } a[4];
219} PGMR0DYNMAPPGLVL;
220/** Pointer to paging level data. */
221typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
222
223
224/*******************************************************************************
225* Global Variables *
226*******************************************************************************/
227/** Pointer to the ring-0 dynamic mapping cache. */
228static PPGMR0DYNMAP g_pPGMR0DynMap;
229
230
231/*******************************************************************************
232* Internal Functions *
233*******************************************************************************/
234static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
235static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
236static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
237static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
238#ifdef DEBUG
239static int pgmR0DynMapTest(PVM pVM);
240#endif
241
242
243/**
244 * Initializes the ring-0 dynamic mapping cache.
245 *
246 * @returns VBox status code.
247 */
248VMMR0DECL(int) PGMR0DynMapInit(void)
249{
250 Assert(!g_pPGMR0DynMap);
251
252 /*
253 * Create and initialize the cache instance.
254 */
255 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
256 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
257 int rc = VINF_SUCCESS;
258 pThis->enmPgMode = SUPR0GetPagingMode();
259 switch (pThis->enmPgMode)
260 {
261 case SUPPAGINGMODE_32_BIT:
262 case SUPPAGINGMODE_32_BIT_GLOBAL:
263 pThis->fLegacyMode = false;
264 break;
265 case SUPPAGINGMODE_PAE:
266 case SUPPAGINGMODE_PAE_GLOBAL:
267 case SUPPAGINGMODE_PAE_NX:
268 case SUPPAGINGMODE_PAE_GLOBAL_NX:
269 case SUPPAGINGMODE_AMD64:
270 case SUPPAGINGMODE_AMD64_GLOBAL:
271 case SUPPAGINGMODE_AMD64_NX:
272 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
273 pThis->fLegacyMode = false;
274 break;
275 default:
276 rc = VERR_INTERNAL_ERROR;
277 break;
278 }
279 if (RT_SUCCESS(rc))
280 {
281 rc = RTSemFastMutexCreate(&pThis->hInitLock);
282 if (RT_SUCCESS(rc))
283 {
284 rc = RTSpinlockCreate(&pThis->hSpinlock);
285 if (RT_SUCCESS(rc))
286 {
287 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
288 g_pPGMR0DynMap = pThis;
289 return VINF_SUCCESS;
290 }
291 RTSemFastMutexDestroy(pThis->hInitLock);
292 }
293 }
294 RTMemFree(pThis);
295 return rc;
296}
297
298
299/**
300 * Terminates the ring-0 dynamic mapping cache.
301 */
302VMMR0DECL(void) PGMR0DynMapTerm(void)
303{
304 /*
305 * Destroy the cache.
306 *
307 * There is not supposed to be any races here, the loader should
308 * make sure about that. So, don't bother locking anything.
309 *
310 * The VM objects should all be destroyed by now, so there is no
311 * dangling users or anything like that to clean up. This routine
312 * is just a mirror image of PGMR0DynMapInit.
313 */
314 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
315 if (pThis)
316 {
317 AssertPtr(pThis);
318 g_pPGMR0DynMap = NULL;
319
320 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
321 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
322 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
323
324 /* Free the associated resources. */
325 RTSemFastMutexDestroy(pThis->hInitLock);
326 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
327 RTSpinlockDestroy(pThis->hSpinlock);
328 pThis->hSpinlock = NIL_RTSPINLOCK;
329 pThis->u32Magic = UINT32_MAX;
330 RTMemFree(pThis);
331 }
332}
333
334
335/**
336 * Initializes the dynamic mapping cache for a new VM.
337 *
338 * @returns VBox status code.
339 * @param pVM Pointer to the shared VM structure.
340 */
341VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
342{
343 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
344
345 /*
346 * Initialize the auto sets.
347 */
348 VMCPUID idCpu = pVM->cCPUs;
349 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
350 while (idCpu-- > 0)
351 {
352 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
353 uint32_t j = RT_ELEMENTS(pSet->aEntries);
354 while (j-- > 0)
355 {
356 pSet->aEntries[j].iPage = UINT16_MAX;
357 pSet->aEntries[j].cRefs = 0;
358 }
359 pSet->cEntries = PGMMAPSET_CLOSED;
360 }
361
362 /*
363 * Do we need the cache? Skip the last bit if we don't.
364 */
365#if 1
366 if (!VMMIsHwVirtExtForced(pVM))
367 return VINF_SUCCESS;
368#endif
369
370 /*
371 * Reference and if necessary setup or expand the cache.
372 */
373 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
374 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
375 int rc = RTSemFastMutexRequest(pThis->hInitLock);
376 AssertLogRelRCReturn(rc, rc);
377
378 pThis->cUsers++;
379 if (pThis->cUsers == 1)
380 {
381 rc = pgmR0DynMapSetup(pThis);
382#ifdef DEBUG
383 if (RT_SUCCESS(rc))
384 {
385 rc = pgmR0DynMapTest(pVM);
386 if (RT_FAILURE(rc))
387 pgmR0DynMapTearDown(pThis);
388 }
389#endif
390 }
391 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
392 rc = pgmR0DynMapExpand(pThis);
393 if (RT_SUCCESS(rc))
394 pVM->pgm.s.pvR0DynMapUsed = pThis;
395 else
396 pThis->cUsers--;
397
398 RTSemFastMutexRelease(pThis->hInitLock);
399 return rc;
400}
401
402
403/**
404 * Terminates the dynamic mapping cache usage for a VM.
405 *
406 * @param pVM Pointer to the shared VM structure.
407 */
408VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
409{
410 /*
411 * Return immediately if we're not using the cache.
412 */
413 if (!pVM->pgm.s.pvR0DynMapUsed)
414 return;
415
416 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
417 AssertPtrReturnVoid(pThis);
418
419 int rc = RTSemFastMutexRequest(pThis->hInitLock);
420 AssertLogRelRCReturnVoid(rc);
421
422 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
423 {
424 pVM->pgm.s.pvR0DynMapUsed = NULL;
425
426#ifdef VBOX_STRICT
427 PGMR0DynMapAssertIntegrity();
428#endif
429
430 /*
431 * Clean up and check the auto sets.
432 */
433 VMCPUID idCpu = pVM->cCPUs;
434 while (idCpu-- > 0)
435 {
436 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
437 uint32_t j = pSet->cEntries;
438 if (j <= RT_ELEMENTS(pSet->aEntries))
439 {
440 /*
441 * The set is open, close it.
442 */
443 while (j-- > 0)
444 {
445 int32_t cRefs = pSet->aEntries[j].cRefs;
446 uint32_t iPage = pSet->aEntries[j].iPage;
447 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
448 if (iPage < pThis->cPages && cRefs > 0)
449 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
450 else
451 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
452
453 pSet->aEntries[j].iPage = UINT16_MAX;
454 pSet->aEntries[j].cRefs = 0;
455 }
456 pSet->cEntries = PGMMAPSET_CLOSED;
457 }
458 else
459 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
460
461 j = RT_ELEMENTS(pSet->aEntries);
462 while (j-- > 0)
463 {
464 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
465 Assert(!pSet->aEntries[j].cRefs);
466 }
467 }
468
469 /*
470 * Release our reference to the mapping cache.
471 */
472 Assert(pThis->cUsers > 0);
473 pThis->cUsers--;
474 if (!pThis->cUsers)
475 pgmR0DynMapTearDown(pThis);
476 }
477 else
478 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
479
480 RTSemFastMutexRelease(pThis->hInitLock);
481}
482
483
484/**
485 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
486 *
487 * @param idCpu The current CPU.
488 * @param pvUser1 The dynamic mapping cache instance.
489 * @param pvUser2 Unused, NULL.
490 */
491static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
492{
493 Assert(!pvUser2);
494 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
495 Assert(pThis == g_pPGMR0DynMap);
496 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
497 uint32_t iPage = pThis->cPages;
498 while (iPage-- > 0)
499 ASMInvalidatePage(paPages[iPage].pvPage);
500}
501
502
503/**
504 * Shoot down the TLBs for every single cache entry on all CPUs.
505 *
506 * @returns IPRT status code (RTMpOnAll).
507 * @param pThis The dynamic mapping cache instance.
508 */
509static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
510{
511 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
512 AssertRC(rc);
513 if (RT_FAILURE(rc))
514 {
515 uint32_t iPage = pThis->cPages;
516 while (iPage-- > 0)
517 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
518 }
519 return rc;
520}
521
522
523/**
524 * Calculate the new cache size based on cMaxLoad statistics.
525 *
526 * @returns Number of pages.
527 * @param pThis The dynamic mapping cache instance.
528 * @param pcMinPages The minimal size in pages.
529 */
530static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
531{
532 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
533
534 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU (/2). */
535 RTCPUID cCpus = RTMpGetCount();
536 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
537 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
538 uint32_t cMinPages = cCpus * (PGMR0DYNMAP_PAGES_PER_CPU / 2);
539
540 /* adjust against cMaxLoad. */
541 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
542 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
543 pThis->cMaxLoad = 0;
544
545 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
546 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
547
548 if (pThis->cMaxLoad > cMinPages)
549 cMinPages = pThis->cMaxLoad;
550
551 /* adjust against max and current size. */
552 if (cPages < pThis->cPages)
553 cPages = pThis->cPages;
554 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
555 if (cPages > PGMR0DYNMAP_MAX_PAGES)
556 cPages = PGMR0DYNMAP_MAX_PAGES;
557
558 if (cMinPages < pThis->cPages)
559 cMinPages = pThis->cPages;
560 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
561 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
562 cMinPages = PGMR0DYNMAP_MAX_PAGES;
563
564 Assert(cMinPages);
565 *pcMinPages = cMinPages;
566 return cPages;
567}
568
569
570/**
571 * Initializes the paging level data.
572 *
573 * @param pThis The dynamic mapping cache instance.
574 * @param pPgLvl The paging level data.
575 */
576void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
577{
578 RTCCUINTREG cr4 = ASMGetCR4();
579 switch (pThis->enmPgMode)
580 {
581 case SUPPAGINGMODE_32_BIT:
582 case SUPPAGINGMODE_32_BIT_GLOBAL:
583 pPgLvl->cLevels = 2;
584 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
585 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
586 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
587 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
588 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
589
590 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
591 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
592 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
593 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
594 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
595 break;
596
597 case SUPPAGINGMODE_PAE:
598 case SUPPAGINGMODE_PAE_GLOBAL:
599 case SUPPAGINGMODE_PAE_NX:
600 case SUPPAGINGMODE_PAE_GLOBAL_NX:
601 pPgLvl->cLevels = 3;
602 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
603 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
604 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
605 pPgLvl->a[0].fAndMask = X86_PDPE_P;
606 pPgLvl->a[0].fResMask = X86_PDPE_P;
607
608 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
609 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
610 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
611 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
612 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
613
614 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
615 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
616 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
617 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
618 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
619 break;
620
621 case SUPPAGINGMODE_AMD64:
622 case SUPPAGINGMODE_AMD64_GLOBAL:
623 case SUPPAGINGMODE_AMD64_NX:
624 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
625 pPgLvl->cLevels = 4;
626 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
627 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
628 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
629 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
630 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
631
632 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
633 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
634 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
635 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
636 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
637
638 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
639 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
640 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
641 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
642 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
643
644 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
645 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
646 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
647 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
648 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
649 break;
650
651 default:
652 AssertFailed();
653 pPgLvl->cLevels = 0;
654 break;
655 }
656
657 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
658 {
659 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
660 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
661 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
662 pPgLvl->a[i].u.pv = NULL;
663 }
664}
665
666
667/**
668 * Maps a PTE.
669 *
670 * This will update the segment structure when new PTs are mapped.
671 *
672 * It also assumes that we (for paranoid reasons) wish to establish a mapping
673 * chain from CR3 to the PT that all corresponds to the processor we're
674 * currently running on, and go about this by running with interrupts disabled
675 * and restarting from CR3 for every change.
676 *
677 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
678 * to re-enable interrupts.
679 * @param pThis The dynamic mapping cache instance.
680 * @param pPgLvl The paging level structure.
681 * @param pvPage The page.
682 * @param pSeg The segment.
683 * @param cMaxPTs The max number of PTs expected in the segment.
684 * @param ppvPTE Where to store the PTE address.
685 */
686static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
687 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
688{
689 Assert(!(ASMGetFlags() & X86_EFL_IF));
690 void *pvEntry = NULL;
691 X86PGPAEUINT uEntry = ASMGetCR3();
692 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
693 {
694 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
695 if (pPgLvl->a[i].HCPhys != HCPhys)
696 {
697 /*
698 * Need to remap this level.
699 * The final level, the PT, will not be freed since that is what it's all about.
700 */
701 ASMIntEnable();
702 if (i + 1 == pPgLvl->cLevels)
703 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
704 else
705 {
706 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
707 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
708 }
709
710 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
711 if (RT_SUCCESS(rc))
712 {
713 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
714 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
715 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
716 if (RT_SUCCESS(rc))
717 {
718 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
719 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
720 pPgLvl->a[i].HCPhys = HCPhys;
721 if (i + 1 == pPgLvl->cLevels)
722 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
723 ASMIntDisable();
724 return VINF_TRY_AGAIN;
725 }
726
727 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
728 }
729 else
730 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
731 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
732 return rc;
733 }
734
735 /*
736 * The next level.
737 */
738 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
739 if (pThis->fLegacyMode)
740 {
741 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
742 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
743 }
744 else
745 {
746 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
747 uEntry = pPgLvl->a[i].u.paPae[iEntry];
748 }
749
750 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
751 {
752 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
753 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
754 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
755 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
756 return VERR_INTERNAL_ERROR;
757 }
758 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
759 }
760
761 /* made it thru without needing to remap anything. */
762 *ppvPTE = pvEntry;
763 return VINF_SUCCESS;
764}
765
766
767/**
768 * Sets up a guard page.
769 *
770 * @param pThis The dynamic mapping cache instance.
771 * @param pPage The page.
772 */
773DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
774{
775 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
776 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
777 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
778#ifdef PGMR0DYNMAP_GUARD_NP
779 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
780#else
781 if (pThis->fLegacyMode)
782 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
783 else
784 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
785#endif
786 pThis->cGuardPages++;
787}
788
789
790/**
791 * Adds a new segment of the specified size.
792 *
793 * @returns VBox status code.
794 * @param pThis The dynamic mapping cache instance.
795 * @param cPages The size of the new segment, give as a page count.
796 */
797static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
798{
799 int rc2;
800 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
801
802 /*
803 * Do the array reallocations first.
804 * (The pages array has to be replaced behind the spinlock of course.)
805 */
806 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
807 if (!pvSavedPTEs)
808 return VERR_NO_MEMORY;
809 pThis->pvSavedPTEs = pvSavedPTEs;
810
811 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
812 if (!pvPages)
813 {
814 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
815 if (pvSavedPTEs)
816 pThis->pvSavedPTEs = pvSavedPTEs;
817 return VERR_NO_MEMORY;
818 }
819
820 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
821 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
822
823 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
824 void *pvToFree = pThis->paPages;
825 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
826
827 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
828 RTMemFree(pvToFree);
829
830 /*
831 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
832 */
833 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
834 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
835 if (!pSeg)
836 return VERR_NO_MEMORY;
837 pSeg->pNext = NULL;
838 pSeg->cPages = cPages;
839 pSeg->iPage = pThis->cPages;
840 pSeg->cPTs = 0;
841 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
842 if (RT_SUCCESS(rc))
843 {
844 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
845 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
846 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
847
848 /*
849 * Walk thru the pages and set them up with a mapping of their PTE and everything.
850 */
851 ASMIntDisable();
852 PGMR0DYNMAPPGLVL PgLvl;
853 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
854 uint32_t const iEndPage = pThis->cPages + cPages;
855 for (uint32_t iPage = pThis->cPages;
856 iPage < iEndPage;
857 iPage++, pbPage += PAGE_SIZE)
858 {
859 /* Initialize the page data. */
860 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
861 pThis->paPages[iPage].pvPage = pbPage;
862 pThis->paPages[iPage].cRefs = 0;
863 pThis->paPages[iPage].uPte.pPae = 0;
864 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
865
866 /* Map its page table, retry until we've got a clean run (paranoia). */
867 do
868 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
869 &pThis->paPages[iPage].uPte.pv);
870 while (rc == VINF_TRY_AGAIN);
871 if (RT_FAILURE(rc))
872 break;
873
874 /* Save the PTE. */
875 if (pThis->fLegacyMode)
876 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
877 else
878 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
879
880#ifdef VBOX_STRICT
881 /* Check that we've got the right entry. */
882 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
883 RTHCPHYS HCPhysPte = pThis->fLegacyMode
884 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
885 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
886 if (HCPhysPage != HCPhysPte)
887 {
888 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
889 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
890 rc = VERR_INTERNAL_ERROR;
891 break;
892 }
893#endif
894 } /* for each page */
895 ASMIntEnable();
896
897 /* cleanup non-PT mappings */
898 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
899 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
900
901 if (RT_SUCCESS(rc))
902 {
903#if PGMR0DYNMAP_GUARD_PAGES > 0
904 /*
905 * Setup guard pages.
906 * (Note: TLBs will be shot down later on.)
907 */
908 uint32_t iPage = pThis->cPages;
909 while (iPage < iEndPage)
910 {
911 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
912 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
913 iPage++; /* the guarded page */
914 }
915
916 /* Make sure the very last page is a guard page too. */
917 iPage = iEndPage - 1;
918 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
919 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
920#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
921
922 /*
923 * Commit it by adding the segment to the list and updating the page count.
924 */
925 pSeg->pNext = pThis->pSegHead;
926 pThis->pSegHead = pSeg;
927 pThis->cPages += cPages;
928 return VINF_SUCCESS;
929 }
930
931 /*
932 * Bail out.
933 */
934 while (pSeg->cPTs-- > 0)
935 {
936 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
937 AssertRC(rc2);
938 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
939 }
940
941 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
942 AssertRC(rc2);
943 pSeg->hMemObj = NIL_RTR0MEMOBJ;
944 }
945 RTMemFree(pSeg);
946
947 /* Don't bother resizing the arrays, but free them if we're the only user. */
948 if (!pThis->cPages)
949 {
950 RTMemFree(pThis->paPages);
951 pThis->paPages = NULL;
952 RTMemFree(pThis->pvSavedPTEs);
953 pThis->pvSavedPTEs = NULL;
954 }
955 return rc;
956}
957
958
959/**
960 * Called by PGMR0DynMapInitVM under the init lock.
961 *
962 * @returns VBox status code.
963 * @param pThis The dynamic mapping cache instance.
964 */
965static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
966{
967 /*
968 * Calc the size and add a segment of that size.
969 */
970 uint32_t cMinPages;
971 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
972 AssertReturn(cPages, VERR_INTERNAL_ERROR);
973 int rc = pgmR0DynMapAddSeg(pThis, cPages);
974 if (rc == VERR_NO_MEMORY)
975 {
976 /*
977 * Try adding smaller segments.
978 */
979 do
980 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
981 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
982 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
983 rc = VINF_SUCCESS;
984 if (rc == VERR_NO_MEMORY)
985 {
986 if (pThis->cPages)
987 pgmR0DynMapTearDown(pThis);
988 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
989 }
990 }
991 Assert(ASMGetFlags() & X86_EFL_IF);
992
993#if PGMR0DYNMAP_GUARD_PAGES > 0
994 /* paranoia */
995 if (RT_SUCCESS(rc))
996 pgmR0DynMapTlbShootDown(pThis);
997#endif
998 return rc;
999}
1000
1001
1002/**
1003 * Called by PGMR0DynMapInitVM under the init lock.
1004 *
1005 * @returns VBox status code.
1006 * @param pThis The dynamic mapping cache instance.
1007 */
1008static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1009{
1010 /*
1011 * Calc the new target size and add a segment of the appropriate size.
1012 */
1013 uint32_t cMinPages;
1014 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1015 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1016 if (pThis->cPages >= cPages)
1017 return VINF_SUCCESS;
1018
1019 uint32_t cAdd = cPages - pThis->cPages;
1020 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1021 if (rc == VERR_NO_MEMORY)
1022 {
1023 /*
1024 * Try adding smaller segments.
1025 */
1026 do
1027 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1028 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1029 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1030 rc = VINF_SUCCESS;
1031 if (rc == VERR_NO_MEMORY)
1032 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1033 }
1034 Assert(ASMGetFlags() & X86_EFL_IF);
1035
1036#if PGMR0DYNMAP_GUARD_PAGES > 0
1037 /* paranoia */
1038 if (RT_SUCCESS(rc))
1039 pgmR0DynMapTlbShootDown(pThis);
1040#endif
1041 return rc;
1042}
1043
1044
1045/**
1046 * Called by PGMR0DynMapTermVM under the init lock.
1047 *
1048 * @returns VBox status code.
1049 * @param pThis The dynamic mapping cache instance.
1050 */
1051static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1052{
1053 /*
1054 * Restore the original page table entries
1055 */
1056 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1057 uint32_t iPage = pThis->cPages;
1058 if (pThis->fLegacyMode)
1059 {
1060 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1061 while (iPage-- > 0)
1062 {
1063 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1064 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1065 X86PGUINT uNew = paSavedPTEs[iPage];
1066 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1067 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1068 }
1069 }
1070 else
1071 {
1072 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1073 while (iPage-- > 0)
1074 {
1075 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1076 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1077 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1078 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1079 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1080 }
1081 }
1082
1083 /*
1084 * Shoot down the TLBs on all CPUs before freeing them.
1085 */
1086 pgmR0DynMapTlbShootDown(pThis);
1087
1088 /*
1089 * Free the segments.
1090 */
1091 while (pThis->pSegHead)
1092 {
1093 int rc;
1094 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1095 pThis->pSegHead = pSeg->pNext;
1096
1097 uint32_t iPT = pSeg->cPTs;
1098 while (iPT-- > 0)
1099 {
1100 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1101 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1102 }
1103 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1104 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1105 pSeg->pNext = NULL;
1106 pSeg->iPage = UINT16_MAX;
1107 pSeg->cPages = 0;
1108 pSeg->cPTs = 0;
1109 RTMemFree(pSeg);
1110 }
1111
1112 /*
1113 * Free the arrays and restore the initial state.
1114 * The cLoadMax value is left behind for the next setup.
1115 */
1116 RTMemFree(pThis->paPages);
1117 pThis->paPages = NULL;
1118 RTMemFree(pThis->pvSavedPTEs);
1119 pThis->pvSavedPTEs = NULL;
1120 pThis->cPages = 0;
1121 pThis->cLoad = 0;
1122}
1123
1124
1125/**
1126 * Release references to a page, caller owns the spin lock.
1127 *
1128 * @param pThis The dynamic mapping cache instance.
1129 * @param iPage The page.
1130 * @param cRefs The number of references to release.
1131 */
1132DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1133{
1134 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1135 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1136 if (!cRefs)
1137 pThis->cLoad--;
1138}
1139
1140
1141/**
1142 * Release references to a page, caller does not own the spin lock.
1143 *
1144 * @param pThis The dynamic mapping cache instance.
1145 * @param iPage The page.
1146 * @param cRefs The number of references to release.
1147 */
1148static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1149{
1150 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1151 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1152 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1153 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1154}
1155
1156
1157/**
1158 * pgmR0DynMapPage worker that deals with the tedious bits.
1159 *
1160 * @returns The page index on success, UINT32_MAX on failure.
1161 * @param pThis The dynamic mapping cache instance.
1162 * @param HCPhys The address of the page to be mapped.
1163 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1164 */
1165static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage)
1166{
1167 /*
1168 * Check if any of the first 5 pages are unreferenced since the caller
1169 * already has made sure they aren't matching.
1170 */
1171 uint32_t const cPages = pThis->cPages;
1172 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1173 uint32_t iFreePage;
1174 if (!paPages[iPage].cRefs)
1175 iFreePage = iPage;
1176 else if (!paPages[(iPage + 1) % cPages].cRefs)
1177 iFreePage = iPage + 1;
1178 else if (!paPages[(iPage + 2) % cPages].cRefs)
1179 iFreePage = iPage + 2;
1180 else if (!paPages[(iPage + 3) % cPages].cRefs)
1181 iFreePage = iPage + 3;
1182 else if (!paPages[(iPage + 4) % cPages].cRefs)
1183 iFreePage = iPage + 4;
1184 else
1185 {
1186 /*
1187 * Search for an unused or matching entry.
1188 */
1189 iFreePage = (iPage + 5) % cPages;
1190 for (;;)
1191 {
1192 if (paPages[iFreePage].HCPhys == HCPhys)
1193 return iFreePage;
1194 if (!paPages[iFreePage].cRefs)
1195 break;
1196
1197 /* advance */
1198 iFreePage = (iFreePage + 1) % cPages;
1199 if (RT_UNLIKELY(iFreePage == iPage))
1200 return UINT32_MAX;
1201 }
1202 }
1203
1204 /*
1205 * Setup the new entry.
1206 */
1207 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1208 paPages[iFreePage].HCPhys = HCPhys;
1209 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1210 if (pThis->fLegacyMode)
1211 {
1212 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1213 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1214 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1215 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1216 | (HCPhys & X86_PTE_PG_MASK);
1217 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1218 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1219 }
1220 else
1221 {
1222 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1223 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1224 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1225 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1226 | (HCPhys & X86_PTE_PAE_PG_MASK);
1227 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1228 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1229 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1230 }
1231 return iFreePage;
1232}
1233
1234
1235/**
1236 * Maps a page into the pool.
1237 *
1238 * @returns Page index on success, UINT32_MAX on failure.
1239 * @param pThis The dynamic mapping cache instance.
1240 * @param HCPhys The address of the page to be mapped.
1241 * @param ppvPage Where to the page address.
1242 */
1243DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, void **ppvPage)
1244{
1245 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1246 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1247 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1248
1249 /*
1250 * Find an entry, if possible a matching one. The HCPhys address is hashed
1251 * down to a page index, collisions are handled by linear searching. Optimize
1252 * for a hit in the first 5 pages.
1253 *
1254 * To the cheap hits here and defer the tedious searching and inserting
1255 * to a helper function.
1256 */
1257 uint32_t const cPages = pThis->cPages;
1258 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1259 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1260 if (paPages[iPage].HCPhys != HCPhys)
1261 {
1262 uint32_t iPage2 = (iPage + 1) % cPages;
1263 if (paPages[iPage2].HCPhys != HCPhys)
1264 {
1265 iPage2 = (iPage + 2) % cPages;
1266 if (paPages[iPage2].HCPhys != HCPhys)
1267 {
1268 iPage2 = (iPage + 3) % cPages;
1269 if (paPages[iPage2].HCPhys != HCPhys)
1270 {
1271 iPage2 = (iPage + 4) % cPages;
1272 if (paPages[iPage2].HCPhys != HCPhys)
1273 {
1274 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage);
1275 if (RT_UNLIKELY(iPage == UINT32_MAX))
1276 {
1277 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1278 return iPage;
1279 }
1280 }
1281 else
1282 iPage = iPage2;
1283 }
1284 else
1285 iPage = iPage2;
1286 }
1287 else
1288 iPage = iPage2;
1289 }
1290 else
1291 iPage = iPage2;
1292 }
1293
1294 /*
1295 * Reference it, update statistics and get the return address.
1296 */
1297 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1298 if (cRefs == 1)
1299 {
1300 pThis->cLoad++;
1301 if (pThis->cLoad > pThis->cMaxLoad)
1302 pThis->cMaxLoad = pThis->cLoad;
1303 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1304 }
1305 else if (RT_UNLIKELY(cRefs <= 0))
1306 {
1307 ASMAtomicDecS32(&paPages[iPage].cRefs);
1308 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1309 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1310 }
1311 void *pvPage = paPages[iPage].pvPage;
1312
1313 /*
1314 * Invalidate the entry?
1315 */
1316 RTCPUID idRealCpu = RTMpCpuId();
1317 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1318 if (fInvalidateIt)
1319 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1320
1321 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1322
1323 /*
1324 * Do the actual invalidation outside the spinlock.
1325 */
1326 ASMInvalidatePage(pvPage);
1327
1328 *ppvPage = pvPage;
1329 return iPage;
1330}
1331
1332
1333/**
1334 * Assert the the integrity of the pool.
1335 *
1336 * @returns VBox status code.
1337 */
1338VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1339{
1340 /*
1341 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1342 */
1343 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1344 if (!pThis)
1345 return VINF_SUCCESS;
1346 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1347 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1348 if (!pThis->cUsers)
1349 return VERR_INVALID_PARAMETER;
1350
1351
1352 int rc = VINF_SUCCESS;
1353 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1354 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1355#define CHECK_RET(expr, a) \
1356 do { \
1357 if (!(expr)) \
1358 { \
1359 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1360 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1361 AssertMsg2 a; \
1362 return VERR_INTERNAL_ERROR; \
1363 } \
1364 } while (0)
1365
1366 /*
1367 * Check that the PTEs are correct.
1368 */
1369 uint32_t cGuard = 0;
1370 uint32_t cLoad = 0;
1371 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1372 uint32_t iPage = pThis->cPages;
1373 if (pThis->fLegacyMode)
1374 {
1375 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1376 while (iPage-- > 0)
1377 {
1378 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1379 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1380 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1381 {
1382#ifdef PGMR0DYNMAP_GUARD_NP
1383 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1384 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1385#else
1386 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1387 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1388#endif
1389 cGuard++;
1390 }
1391 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1392 {
1393 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1394 X86PGUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1395 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1396 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1397 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1398 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1399 if (paPages[iPage].cRefs)
1400 cLoad++;
1401 }
1402 else
1403 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1404 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1405 }
1406 }
1407 else
1408 {
1409 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1410 while (iPage-- > 0)
1411 {
1412 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1413 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1414 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1415 {
1416#ifdef PGMR0DYNMAP_GUARD_NP
1417 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1418 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1419#else
1420 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1421 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1422#endif
1423 cGuard++;
1424 }
1425 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1426 {
1427 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1428 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1429 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1430 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1431 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1432 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1433 if (paPages[iPage].cRefs)
1434 cLoad++;
1435 }
1436 else
1437 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1438 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1439 }
1440 }
1441
1442 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1443 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1444
1445#undef CHECK_RET
1446 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1447 return VINF_SUCCESS;
1448}
1449
1450
1451/**
1452 * Signals the start of a new set of mappings.
1453 *
1454 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1455 * API is called.
1456 *
1457 * @param pVCpu The shared data for the current virtual CPU.
1458 */
1459VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1460{
1461 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1462 pVCpu->pgm.s.AutoSet.cEntries = 0;
1463}
1464
1465
1466/**
1467 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1468 * since the PGMDynMapStartAutoSet call.
1469 *
1470 * If the set is already closed, nothing will be done.
1471 *
1472 * @param pVCpu The shared data for the current virtual CPU.
1473 */
1474VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1475{
1476 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1477
1478 /*
1479 * Is the set open?
1480 *
1481 * We might be closed before VM execution and not reopened again before
1482 * we leave for ring-3 or something.
1483 */
1484 uint32_t i = pSet->cEntries;
1485 if (i != PGMMAPSET_CLOSED)
1486 {
1487 /*
1488 * Close the set
1489 */
1490 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1491 pSet->cEntries = PGMMAPSET_CLOSED;
1492
1493 /*
1494 * Release any pages it's referencing.
1495 */
1496 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1497 {
1498 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1499 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1500 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1501
1502 while (i-- > 0)
1503 {
1504 uint32_t iPage = pSet->aEntries[i].iPage;
1505 Assert(iPage < pThis->cPages);
1506 int32_t cRefs = pSet->aEntries[i].cRefs;
1507 Assert(cRefs > 0);
1508 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1509
1510 pSet->aEntries[i].iPage = UINT16_MAX;
1511 pSet->aEntries[i].cRefs = 0;
1512 }
1513
1514 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1515 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1516 }
1517 }
1518}
1519
1520
1521/**
1522 * Migrates the automatic mapping set of the current vCPU if it's active and
1523 * necessary.
1524 *
1525 * This is called when re-entering the hardware assisted execution mode after a
1526 * nip down to ring-3. We run the risk that the CPU might have change and we
1527 * will therefore make sure all the cache entries currently in the auto set will
1528 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1529 * the entries will have been flagged as invalidated.
1530 *
1531 * @param pVCpu The shared data for the current virtual CPU.
1532 * @thread EMT
1533 */
1534VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1535{
1536 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1537 uint32_t i = pSet->cEntries;
1538 if (i != PGMMAPSET_CLOSED)
1539 {
1540 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1541 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1542 {
1543 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1544 RTCPUID idRealCpu = RTMpCpuId();
1545
1546 while (i-- > 0)
1547 {
1548 Assert(pSet->aEntries[i].cRefs > 0);
1549 uint32_t iPage = pSet->aEntries[i].iPage;
1550 Assert(iPage < pThis->cPages);
1551 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1552 {
1553 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1554 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1555 }
1556 }
1557 }
1558 }
1559}
1560
1561
1562/**
1563 * As a final resort for a full auto set, try merge duplicate entries.
1564 *
1565 * @param pSet The set.
1566 */
1567static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1568{
1569 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1570 {
1571 uint16_t const iPage = pSet->aEntries[i].iPage;
1572 uint32_t j = i + 1;
1573 while (j < pSet->cEntries)
1574 {
1575 if (pSet->aEntries[j].iPage != iPage)
1576 j++;
1577 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1578 {
1579 /* merge j into i removing j. */
1580 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1581 pSet->cEntries--;
1582 if (j < pSet->cEntries)
1583 {
1584 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1585 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1586 pSet->aEntries[pSet->cEntries].cRefs = 0;
1587 }
1588 else
1589 {
1590 pSet->aEntries[j].iPage = UINT16_MAX;
1591 pSet->aEntries[j].cRefs = 0;
1592 }
1593 }
1594 else
1595 {
1596 /* migrate the max number of refs from j into i and quit the inner loop. */
1597 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1598 Assert(pSet->aEntries[j].cRefs > cMigrate);
1599 pSet->aEntries[j].cRefs -= cMigrate;
1600 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1601 break;
1602 }
1603 }
1604 }
1605}
1606
1607
1608/* documented elsewhere - a bit of a mess. */
1609VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1610{
1611 /*
1612 * Validate state.
1613 */
1614 AssertPtr(ppv);
1615 *ppv = NULL;
1616 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1617 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1618 VERR_ACCESS_DENIED);
1619 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1620 PVMCPU pVCpu = VMMGetCpu(pVM);
1621 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1622 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1623 AssertMsgReturn(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1624 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1625
1626 /*
1627 * Map it.
1628 */
1629 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, ppv);
1630 if (RT_UNLIKELY(iPage == UINT32_MAX))
1631 {
1632 static uint32_t s_cBitched = 0;
1633 if (++s_cBitched < 10)
1634 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1635 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages));
1636 return VERR_PGM_DYNMAP_FAILED;
1637 }
1638
1639 /*
1640 * Add the page to the auto reference set.
1641 * If it's less than half full, don't bother looking for duplicates.
1642 */
1643 if (pSet->cEntries < RT_ELEMENTS(pSet->aEntries) / 2)
1644 {
1645 pSet->aEntries[pSet->cEntries].cRefs = 1;
1646 pSet->aEntries[pSet->cEntries].iPage = iPage;
1647 pSet->cEntries++;
1648 }
1649 else
1650 {
1651 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1652 int32_t i = pSet->cEntries;
1653 while (i-- > 0)
1654 if ( pSet->aEntries[i].iPage == iPage
1655 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1656 {
1657 pSet->aEntries[i].cRefs++;
1658 break;
1659 }
1660 if (i < 0)
1661 {
1662 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1663 pgmDynMapOptimizeAutoSet(pSet);
1664 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1665 {
1666 pSet->aEntries[pSet->cEntries].cRefs = 1;
1667 pSet->aEntries[pSet->cEntries].iPage = iPage;
1668 pSet->cEntries++;
1669 }
1670 else
1671 {
1672 /* We're screwed. */
1673 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1674
1675 static uint32_t s_cBitched = 0;
1676 if (++s_cBitched < 10)
1677 LogRel(("PGMDynMapHCPage: set is full!\n"));
1678 *ppv = NULL;
1679 return VERR_PGM_DYNMAP_FULL_SET;
1680 }
1681 }
1682 }
1683
1684 return VINF_SUCCESS;
1685}
1686
1687
1688#ifdef DEBUG
1689/** For pgmR0DynMapTest3PerCpu. */
1690typedef struct PGMR0DYNMAPTEST
1691{
1692 uint32_t u32Expect;
1693 uint32_t *pu32;
1694 uint32_t volatile cFailures;
1695} PGMR0DYNMAPTEST;
1696typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1697
1698/**
1699 * Checks that the content of the page is the same on all CPUs, i.e. that there
1700 * are no CPU specfic PTs or similar nasty stuff involved.
1701 *
1702 * @param idCpu The current CPU.
1703 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1704 * @param pvUser2 Unused, ignored.
1705 */
1706static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1707{
1708 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1709 ASMInvalidatePage(pTest->pu32);
1710 if (*pTest->pu32 != pTest->u32Expect)
1711 ASMAtomicIncU32(&pTest->cFailures);
1712 NOREF(pvUser2); NOREF(idCpu);
1713}
1714
1715
1716/**
1717 * Performs some basic tests in debug builds.
1718 */
1719static int pgmR0DynMapTest(PVM pVM)
1720{
1721 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
1722 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1723 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
1724 uint32_t i;
1725
1726 /*
1727 * Assert internal integrity first.
1728 */
1729 LogRel(("Test #0\n"));
1730 int rc = PGMR0DynMapAssertIntegrity();
1731 if (RT_FAILURE(rc))
1732 return rc;
1733
1734 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
1735 pVM->pgm.s.pvR0DynMapUsed = pThis;
1736
1737 /*
1738 * Simple test, map CR3 twice and check that we're getting the
1739 * same mapping address back.
1740 */
1741 LogRel(("Test #1\n"));
1742 ASMIntDisable();
1743 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1744
1745 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
1746 void *pv = (void *)(intptr_t)-1;
1747 void *pv2 = (void *)(intptr_t)-2;
1748 rc = PGMDynMapHCPage(pVM, cr3, &pv);
1749 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
1750 ASMIntEnable();
1751 if ( RT_SUCCESS(rc2)
1752 && RT_SUCCESS(rc)
1753 && pv == pv2)
1754 {
1755 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1756 rc = PGMR0DynMapAssertIntegrity();
1757
1758 /*
1759 * Check that the simple set overflow code works by filling it
1760 * with more CR3 mappings.
1761 */
1762 LogRel(("Test #2\n"));
1763 ASMIntDisable();
1764 for (i = 0 ; i < UINT16_MAX*2 + RT_ELEMENTS(pSet->aEntries) / 2 && RT_SUCCESS(rc) && pv2 == pv; i++)
1765 {
1766 pv2 = (void *)(intptr_t)-4;
1767 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
1768 }
1769 ASMIntEnable();
1770 if (RT_FAILURE(rc) || pv != pv2)
1771 {
1772 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
1773 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1774 }
1775 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries) / 2)
1776 {
1777 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
1778 rc = VERR_INTERNAL_ERROR;
1779 }
1780 else if ( pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 1].cRefs != UINT16_MAX - 1
1781 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 2].cRefs != UINT16_MAX - 1
1782 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 3].cRefs != 2+2+3
1783 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 4].cRefs != 1)
1784 {
1785 LogRel(("failed(%d): bad set dist: ", __LINE__));
1786 for (i = 0; i < pSet->cEntries; i++)
1787 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
1788 LogRel(("\n"));
1789 rc = VERR_INTERNAL_ERROR;
1790 }
1791 if (RT_SUCCESS(rc))
1792 rc = PGMR0DynMapAssertIntegrity();
1793 if (RT_SUCCESS(rc))
1794 {
1795 /*
1796 * Trigger an set optimization run (exactly).
1797 */
1798 LogRel(("Test #3\n"));
1799 ASMIntDisable();
1800 pv2 = NULL;
1801 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) / 2 && RT_SUCCESS(rc) && pv2 != pv; i++)
1802 {
1803 pv2 = (void *)(intptr_t)(-5 - i);
1804 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
1805 }
1806 ASMIntEnable();
1807 if (RT_FAILURE(rc) || pv == pv2)
1808 {
1809 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
1810 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1811 }
1812 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
1813 {
1814 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1815 rc = VERR_INTERNAL_ERROR;
1816 }
1817 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1818 if (RT_SUCCESS(rc))
1819 rc = PGMR0DynMapAssertIntegrity();
1820 if (RT_SUCCESS(rc))
1821 {
1822 /*
1823 * Trigger an overflow error.
1824 */
1825 LogRel(("Test #4\n"));
1826 ASMIntDisable();
1827 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) / 2 - 3 + 1 && pv2 != pv; i++)
1828 {
1829 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
1830 if (RT_SUCCESS(rc))
1831 rc = PGMR0DynMapAssertIntegrity();
1832 if (RT_FAILURE(rc))
1833 break;
1834 }
1835 ASMIntEnable();
1836 if (rc == VERR_PGM_DYNMAP_FULL_SET)
1837 {
1838 /* flush the set. */
1839 LogRel(("Test #5\n"));
1840 ASMIntDisable();
1841 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1842 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1843 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1844 ASMIntEnable();
1845
1846 rc = PGMR0DynMapAssertIntegrity();
1847 }
1848 else
1849 {
1850 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
1851 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
1852 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1853 }
1854 }
1855 }
1856 }
1857 else
1858 {
1859 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
1860 if (RT_SUCCESS(rc))
1861 rc = rc2;
1862 }
1863
1864 /*
1865 * Check that everyone sees the same stuff.
1866 */
1867 if (RT_SUCCESS(rc))
1868 {
1869 LogRel(("Test #5\n"));
1870 ASMIntDisable();
1871 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
1872 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
1873 if (RT_SUCCESS(rc))
1874 {
1875 PGMR0DYNMAPTEST Test;
1876 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
1877 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
1878 Test.u32Expect = *pu32Real;
1879 ASMAtomicWriteU32(&Test.cFailures, 0);
1880 ASMIntEnable();
1881
1882 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
1883 if (RT_FAILURE(rc))
1884 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
1885 else if (Test.cFailures)
1886 {
1887 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
1888 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1889 rc = VERR_INTERNAL_ERROR;
1890 }
1891 else
1892 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
1893 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1894 }
1895 else
1896 {
1897 ASMIntEnable();
1898 LogRel(("failed(%d): rc=%Rrc\n", rc));
1899 }
1900 }
1901
1902 /*
1903 * Clean up.
1904 */
1905 LogRel(("Cleanup.\n"));
1906 ASMIntDisable();
1907 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1908 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1909 ASMIntEnable();
1910
1911 if (RT_SUCCESS(rc))
1912 rc = PGMR0DynMapAssertIntegrity();
1913 else
1914 PGMR0DynMapAssertIntegrity();
1915
1916 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
1917 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1918 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
1919 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
1920 return rc;
1921}
1922#endif /* DEBUG */
1923
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette