VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14822

Last change on this file since 14822 was 14822, checked in by vboxsync, 16 years ago

PGMR0DynMap: Fixed bug in previous pgmR0DynMapPageSlow bug fix. Bad index calculation. it was responsible for guest & host stability issues (wrong page & zone/heap corruption).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.6 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14822 2008-11-30 05:36:38Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((8*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 64
52/** The number of guard pages. */
53#if defined(VBOX_STRICT)
54# define PGMR0DYNMAP_GUARD_PAGES 4
55#else
56# define PGMR0DYNMAP_GUARD_PAGES 1
57#endif
58/** The dummy physical address of guard pages. */
59#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
60/** The dummy reference count of guard pages. (Must be non-zero.) */
61#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
62#if 0
63/** Define this to just clear the present bit on guard pages.
64 * The alternative is to replace the entire PTE with an bad not-present
65 * PTE. Either way, XNU will screw us. :-/ */
66#define PGMR0DYNMAP_GUARD_NP
67#endif
68/** The dummy PTE value for a page. */
69#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
70/** The dummy PTE value for a page. */
71#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
72/** Calcs the overload threshold. Current set at 50%. */
73#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
74
75#if 0
76/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
77//#define RTSpinlockAcquire(a,b) do {} while (0)
78//#define RTSpinlockRelease(a,b) do {} while (0)
79#endif
80
81
82/*******************************************************************************
83* Structures and Typedefs *
84*******************************************************************************/
85/**
86 * Ring-0 dynamic mapping cache segment.
87 *
88 * The dynamic mapping cache can be extended with additional segments if the
89 * load is found to be too high. This done the next time a VM is created, under
90 * the protection of the init mutex. The arrays is reallocated and the new
91 * segment is added to the end of these. Nothing is rehashed of course, as the
92 * indexes / addresses must remain unchanged.
93 *
94 * This structure is only modified while owning the init mutex or during module
95 * init / term.
96 */
97typedef struct PGMR0DYNMAPSEG
98{
99 /** Pointer to the next segment. */
100 struct PGMR0DYNMAPSEG *pNext;
101 /** The memory object for the virtual address range that we're abusing. */
102 RTR0MEMOBJ hMemObj;
103 /** The start page in the cache. (I.e. index into the arrays.) */
104 uint16_t iPage;
105 /** The number of pages this segment contributes. */
106 uint16_t cPages;
107 /** The number of page tables. */
108 uint16_t cPTs;
109 /** The memory objects for the page tables. */
110 RTR0MEMOBJ ahMemObjPTs[1];
111} PGMR0DYNMAPSEG;
112/** Pointer to a ring-0 dynamic mapping cache segment. */
113typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
114
115
116/**
117 * Ring-0 dynamic mapping cache entry.
118 *
119 * This structure tracks
120 */
121typedef struct PGMR0DYNMAPENTRY
122{
123 /** The physical address of the currently mapped page.
124 * This is duplicate for three reasons: cache locality, cache policy of the PT
125 * mappings and sanity checks. */
126 RTHCPHYS HCPhys;
127 /** Pointer to the page. */
128 void *pvPage;
129 /** The number of references. */
130 int32_t volatile cRefs;
131 /** PTE pointer union. */
132 union PGMR0DYNMAPENTRY_PPTE
133 {
134 /** PTE pointer, 32-bit legacy version. */
135 PX86PTE pLegacy;
136 /** PTE pointer, PAE version. */
137 PX86PTEPAE pPae;
138 /** PTE pointer, the void version. */
139 void *pv;
140 } uPte;
141 /** CPUs that haven't invalidated this entry after it's last update. */
142 RTCPUSET PendingSet;
143} PGMR0DYNMAPENTRY;
144/** Pointer to a ring-0 dynamic mapping cache entry. */
145typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
146
147
148/**
149 * Ring-0 dynamic mapping cache.
150 *
151 * This is initialized during VMMR0 module init but no segments are allocated at
152 * that time. Segments will be added when the first VM is started and removed
153 * again when the last VM shuts down, thus avoid consuming memory while dormant.
154 * At module termination, the remaining bits will be freed up.
155 */
156typedef struct PGMR0DYNMAP
157{
158 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
159 uint32_t u32Magic;
160 /** Spinlock serializing the normal operation of the cache. */
161 RTSPINLOCK hSpinlock;
162 /** Array for tracking and managing the pages. */
163 PPGMR0DYNMAPENTRY paPages;
164 /** The cache size given as a number of pages. */
165 uint32_t cPages;
166 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
167 bool fLegacyMode;
168 /** The current load.
169 * This does not include guard pages. */
170 uint32_t cLoad;
171 /** The max load ever.
172 * This is maintained to get trigger adding of more mapping space. */
173 uint32_t cMaxLoad;
174 /** Initialization / termination lock. */
175 RTSEMFASTMUTEX hInitLock;
176 /** The number of guard pages. */
177 uint32_t cGuardPages;
178 /** The number of users (protected by hInitLock). */
179 uint32_t cUsers;
180 /** Array containing a copy of the original page tables.
181 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
182 void *pvSavedPTEs;
183 /** List of segments. */
184 PPGMR0DYNMAPSEG pSegHead;
185 /** The paging mode. */
186 SUPPAGINGMODE enmPgMode;
187} PGMR0DYNMAP;
188/** Pointer to the ring-0 dynamic mapping cache */
189typedef PGMR0DYNMAP *PPGMR0DYNMAP;
190
191/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
192#define PGMR0DYNMAP_MAGIC 0x19640201
193
194
195/**
196 * Paging level data.
197 */
198typedef struct PGMR0DYNMAPPGLVL
199{
200 uint32_t cLevels; /**< The number of levels. */
201 struct
202 {
203 RTHCPHYS HCPhys; /**< The address of the page for the current level,
204 * i.e. what hMemObj/hMapObj is currently mapping. */
205 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
206 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
207 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
208 uint32_t fPtrShift; /**< The pointer shift count. */
209 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
210 uint64_t fAndMask; /**< And mask to check entry flags. */
211 uint64_t fResMask; /**< The result from applying fAndMask. */
212 union
213 {
214 void *pv; /**< hMapObj address. */
215 PX86PGUINT paLegacy; /**< Legacy table view. */
216 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
217 } u;
218 } a[4];
219} PGMR0DYNMAPPGLVL;
220/** Pointer to paging level data. */
221typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
222
223
224/*******************************************************************************
225* Global Variables *
226*******************************************************************************/
227/** Pointer to the ring-0 dynamic mapping cache. */
228static PPGMR0DYNMAP g_pPGMR0DynMap;
229
230
231/*******************************************************************************
232* Internal Functions *
233*******************************************************************************/
234static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
235static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
236static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
237static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
238#ifdef DEBUG
239static int pgmR0DynMapTest(PVM pVM);
240#endif
241
242
243/**
244 * Initializes the ring-0 dynamic mapping cache.
245 *
246 * @returns VBox status code.
247 */
248VMMR0DECL(int) PGMR0DynMapInit(void)
249{
250 Assert(!g_pPGMR0DynMap);
251
252 /*
253 * Create and initialize the cache instance.
254 */
255 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
256 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
257 int rc = VINF_SUCCESS;
258 pThis->enmPgMode = SUPR0GetPagingMode();
259 switch (pThis->enmPgMode)
260 {
261 case SUPPAGINGMODE_32_BIT:
262 case SUPPAGINGMODE_32_BIT_GLOBAL:
263 pThis->fLegacyMode = false;
264 break;
265 case SUPPAGINGMODE_PAE:
266 case SUPPAGINGMODE_PAE_GLOBAL:
267 case SUPPAGINGMODE_PAE_NX:
268 case SUPPAGINGMODE_PAE_GLOBAL_NX:
269 case SUPPAGINGMODE_AMD64:
270 case SUPPAGINGMODE_AMD64_GLOBAL:
271 case SUPPAGINGMODE_AMD64_NX:
272 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
273 pThis->fLegacyMode = false;
274 break;
275 default:
276 rc = VERR_INTERNAL_ERROR;
277 break;
278 }
279 if (RT_SUCCESS(rc))
280 {
281 rc = RTSemFastMutexCreate(&pThis->hInitLock);
282 if (RT_SUCCESS(rc))
283 {
284 rc = RTSpinlockCreate(&pThis->hSpinlock);
285 if (RT_SUCCESS(rc))
286 {
287 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
288 g_pPGMR0DynMap = pThis;
289 return VINF_SUCCESS;
290 }
291 RTSemFastMutexDestroy(pThis->hInitLock);
292 }
293 }
294 RTMemFree(pThis);
295 return rc;
296}
297
298
299/**
300 * Terminates the ring-0 dynamic mapping cache.
301 */
302VMMR0DECL(void) PGMR0DynMapTerm(void)
303{
304 /*
305 * Destroy the cache.
306 *
307 * There is not supposed to be any races here, the loader should
308 * make sure about that. So, don't bother locking anything.
309 *
310 * The VM objects should all be destroyed by now, so there is no
311 * dangling users or anything like that to clean up. This routine
312 * is just a mirror image of PGMR0DynMapInit.
313 */
314 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
315 if (pThis)
316 {
317 AssertPtr(pThis);
318 g_pPGMR0DynMap = NULL;
319
320 /* This should *never* happen, but in case it does try not to leak memory. */
321 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
322 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
323 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
324 if (pThis->paPages)
325 pgmR0DynMapTearDown(pThis);
326
327 /* Free the associated resources. */
328 RTSemFastMutexDestroy(pThis->hInitLock);
329 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
330 RTSpinlockDestroy(pThis->hSpinlock);
331 pThis->hSpinlock = NIL_RTSPINLOCK;
332 pThis->u32Magic = UINT32_MAX;
333 RTMemFree(pThis);
334 }
335}
336
337
338/**
339 * Initializes the dynamic mapping cache for a new VM.
340 *
341 * @returns VBox status code.
342 * @param pVM Pointer to the shared VM structure.
343 */
344VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
345{
346 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
347
348 /*
349 * Initialize the auto sets.
350 */
351 VMCPUID idCpu = pVM->cCPUs;
352 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
353 while (idCpu-- > 0)
354 {
355 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
356 uint32_t j = RT_ELEMENTS(pSet->aEntries);
357 while (j-- > 0)
358 {
359 pSet->aEntries[j].iPage = UINT16_MAX;
360 pSet->aEntries[j].cRefs = 0;
361 }
362 pSet->cEntries = PGMMAPSET_CLOSED;
363 }
364
365 /*
366 * Do we need the cache? Skip the last bit if we don't.
367 */
368 if (!VMMIsHwVirtExtForced(pVM))
369 return VINF_SUCCESS;
370
371 /*
372 * Reference and if necessary setup or expand the cache.
373 */
374 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
375 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
376 int rc = RTSemFastMutexRequest(pThis->hInitLock);
377 AssertLogRelRCReturn(rc, rc);
378
379 pThis->cUsers++;
380 if (pThis->cUsers == 1)
381 {
382 rc = pgmR0DynMapSetup(pThis);
383#ifdef DEBUG
384 if (RT_SUCCESS(rc))
385 {
386 rc = pgmR0DynMapTest(pVM);
387 if (RT_FAILURE(rc))
388 pgmR0DynMapTearDown(pThis);
389 }
390#endif
391 }
392 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
393 rc = pgmR0DynMapExpand(pThis);
394 if (RT_SUCCESS(rc))
395 pVM->pgm.s.pvR0DynMapUsed = pThis;
396 else
397 pThis->cUsers--;
398
399 RTSemFastMutexRelease(pThis->hInitLock);
400 return rc;
401}
402
403
404/**
405 * Terminates the dynamic mapping cache usage for a VM.
406 *
407 * @param pVM Pointer to the shared VM structure.
408 */
409VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
410{
411 /*
412 * Return immediately if we're not using the cache.
413 */
414 if (!pVM->pgm.s.pvR0DynMapUsed)
415 return;
416
417 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
418 AssertPtrReturnVoid(pThis);
419
420 int rc = RTSemFastMutexRequest(pThis->hInitLock);
421 AssertLogRelRCReturnVoid(rc);
422
423 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
424 {
425 pVM->pgm.s.pvR0DynMapUsed = NULL;
426
427#ifdef VBOX_STRICT
428 PGMR0DynMapAssertIntegrity();
429#endif
430
431 /*
432 * Clean up and check the auto sets.
433 */
434 VMCPUID idCpu = pVM->cCPUs;
435 while (idCpu-- > 0)
436 {
437 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
438 uint32_t j = pSet->cEntries;
439 if (j <= RT_ELEMENTS(pSet->aEntries))
440 {
441 /*
442 * The set is open, close it.
443 */
444 while (j-- > 0)
445 {
446 int32_t cRefs = pSet->aEntries[j].cRefs;
447 uint32_t iPage = pSet->aEntries[j].iPage;
448 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
449 if (iPage < pThis->cPages && cRefs > 0)
450 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
451 else
452 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
453
454 pSet->aEntries[j].iPage = UINT16_MAX;
455 pSet->aEntries[j].cRefs = 0;
456 }
457 pSet->cEntries = PGMMAPSET_CLOSED;
458 }
459 else
460 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
461
462 j = RT_ELEMENTS(pSet->aEntries);
463 while (j-- > 0)
464 {
465 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
466 Assert(!pSet->aEntries[j].cRefs);
467 }
468 }
469
470 /*
471 * Release our reference to the mapping cache.
472 */
473 Assert(pThis->cUsers > 0);
474 pThis->cUsers--;
475 if (!pThis->cUsers)
476 pgmR0DynMapTearDown(pThis);
477 }
478 else
479 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
480
481 RTSemFastMutexRelease(pThis->hInitLock);
482}
483
484
485/**
486 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
487 *
488 * @param idCpu The current CPU.
489 * @param pvUser1 The dynamic mapping cache instance.
490 * @param pvUser2 Unused, NULL.
491 */
492static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
493{
494 Assert(!pvUser2);
495 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
496 Assert(pThis == g_pPGMR0DynMap);
497 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
498 uint32_t iPage = pThis->cPages;
499 while (iPage-- > 0)
500 ASMInvalidatePage(paPages[iPage].pvPage);
501}
502
503
504/**
505 * Shoot down the TLBs for every single cache entry on all CPUs.
506 *
507 * @returns IPRT status code (RTMpOnAll).
508 * @param pThis The dynamic mapping cache instance.
509 */
510static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
511{
512 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
513 AssertRC(rc);
514 if (RT_FAILURE(rc))
515 {
516 uint32_t iPage = pThis->cPages;
517 while (iPage-- > 0)
518 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
519 }
520 return rc;
521}
522
523
524/**
525 * Calculate the new cache size based on cMaxLoad statistics.
526 *
527 * @returns Number of pages.
528 * @param pThis The dynamic mapping cache instance.
529 * @param pcMinPages The minimal size in pages.
530 */
531static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
532{
533 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
534
535 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU (/2). */
536 RTCPUID cCpus = RTMpGetCount();
537 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
538 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
539 uint32_t cMinPages = cCpus * (PGMR0DYNMAP_PAGES_PER_CPU / 2);
540
541 /* adjust against cMaxLoad. */
542 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
543 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
544 pThis->cMaxLoad = 0;
545
546 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
547 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
548
549 if (pThis->cMaxLoad > cMinPages)
550 cMinPages = pThis->cMaxLoad;
551
552 /* adjust against max and current size. */
553 if (cPages < pThis->cPages)
554 cPages = pThis->cPages;
555 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
556 if (cPages > PGMR0DYNMAP_MAX_PAGES)
557 cPages = PGMR0DYNMAP_MAX_PAGES;
558
559 if (cMinPages < pThis->cPages)
560 cMinPages = pThis->cPages;
561 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
562 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
563 cMinPages = PGMR0DYNMAP_MAX_PAGES;
564
565 Assert(cMinPages);
566 *pcMinPages = cMinPages;
567 return cPages;
568}
569
570
571/**
572 * Initializes the paging level data.
573 *
574 * @param pThis The dynamic mapping cache instance.
575 * @param pPgLvl The paging level data.
576 */
577void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
578{
579 RTCCUINTREG cr4 = ASMGetCR4();
580 switch (pThis->enmPgMode)
581 {
582 case SUPPAGINGMODE_32_BIT:
583 case SUPPAGINGMODE_32_BIT_GLOBAL:
584 pPgLvl->cLevels = 2;
585 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
586 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
587 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
588 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
589 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
590
591 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
592 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
593 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
594 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
595 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
596 break;
597
598 case SUPPAGINGMODE_PAE:
599 case SUPPAGINGMODE_PAE_GLOBAL:
600 case SUPPAGINGMODE_PAE_NX:
601 case SUPPAGINGMODE_PAE_GLOBAL_NX:
602 pPgLvl->cLevels = 3;
603 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
604 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
605 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
606 pPgLvl->a[0].fAndMask = X86_PDPE_P;
607 pPgLvl->a[0].fResMask = X86_PDPE_P;
608
609 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
610 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
611 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
612 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
613 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
614
615 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
616 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
617 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
618 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
619 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
620 break;
621
622 case SUPPAGINGMODE_AMD64:
623 case SUPPAGINGMODE_AMD64_GLOBAL:
624 case SUPPAGINGMODE_AMD64_NX:
625 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
626 pPgLvl->cLevels = 4;
627 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
628 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
629 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
630 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
631 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
632
633 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
634 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
635 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
636 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
637 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
638
639 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
640 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
641 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
642 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
643 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
644
645 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
646 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
647 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
648 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
649 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
650 break;
651
652 default:
653 AssertFailed();
654 pPgLvl->cLevels = 0;
655 break;
656 }
657
658 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
659 {
660 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
661 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
662 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
663 pPgLvl->a[i].u.pv = NULL;
664 }
665}
666
667
668/**
669 * Maps a PTE.
670 *
671 * This will update the segment structure when new PTs are mapped.
672 *
673 * It also assumes that we (for paranoid reasons) wish to establish a mapping
674 * chain from CR3 to the PT that all corresponds to the processor we're
675 * currently running on, and go about this by running with interrupts disabled
676 * and restarting from CR3 for every change.
677 *
678 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
679 * to re-enable interrupts.
680 * @param pThis The dynamic mapping cache instance.
681 * @param pPgLvl The paging level structure.
682 * @param pvPage The page.
683 * @param pSeg The segment.
684 * @param cMaxPTs The max number of PTs expected in the segment.
685 * @param ppvPTE Where to store the PTE address.
686 */
687static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
688 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
689{
690 Assert(!(ASMGetFlags() & X86_EFL_IF));
691 void *pvEntry = NULL;
692 X86PGPAEUINT uEntry = ASMGetCR3();
693 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
694 {
695 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
696 if (pPgLvl->a[i].HCPhys != HCPhys)
697 {
698 /*
699 * Need to remap this level.
700 * The final level, the PT, will not be freed since that is what it's all about.
701 */
702 ASMIntEnable();
703 if (i + 1 == pPgLvl->cLevels)
704 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
705 else
706 {
707 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
708 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
709 }
710
711 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
712 if (RT_SUCCESS(rc))
713 {
714 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
715 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
716 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
717 if (RT_SUCCESS(rc))
718 {
719 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
720 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
721 pPgLvl->a[i].HCPhys = HCPhys;
722 if (i + 1 == pPgLvl->cLevels)
723 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
724 ASMIntDisable();
725 return VINF_TRY_AGAIN;
726 }
727
728 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
729 }
730 else
731 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
732 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
733 return rc;
734 }
735
736 /*
737 * The next level.
738 */
739 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
740 if (pThis->fLegacyMode)
741 {
742 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
743 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
744 }
745 else
746 {
747 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
748 uEntry = pPgLvl->a[i].u.paPae[iEntry];
749 }
750
751 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
752 {
753 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
754 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
755 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
756 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
757 return VERR_INTERNAL_ERROR;
758 }
759 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
760 }
761
762 /* made it thru without needing to remap anything. */
763 *ppvPTE = pvEntry;
764 return VINF_SUCCESS;
765}
766
767
768/**
769 * Sets up a guard page.
770 *
771 * @param pThis The dynamic mapping cache instance.
772 * @param pPage The page.
773 */
774DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
775{
776 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
777 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
778 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
779#ifdef PGMR0DYNMAP_GUARD_NP
780 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
781#else
782 if (pThis->fLegacyMode)
783 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
784 else
785 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
786#endif
787 pThis->cGuardPages++;
788}
789
790
791/**
792 * Adds a new segment of the specified size.
793 *
794 * @returns VBox status code.
795 * @param pThis The dynamic mapping cache instance.
796 * @param cPages The size of the new segment, give as a page count.
797 */
798static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
799{
800 int rc2;
801 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
802
803 /*
804 * Do the array reallocations first.
805 * (The pages array has to be replaced behind the spinlock of course.)
806 */
807 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
808 if (!pvSavedPTEs)
809 return VERR_NO_MEMORY;
810 pThis->pvSavedPTEs = pvSavedPTEs;
811
812 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
813 if (!pvPages)
814 {
815 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
816 if (pvSavedPTEs)
817 pThis->pvSavedPTEs = pvSavedPTEs;
818 return VERR_NO_MEMORY;
819 }
820
821 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
822 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
823
824 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
825 void *pvToFree = pThis->paPages;
826 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
827
828 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
829 RTMemFree(pvToFree);
830
831 /*
832 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
833 */
834 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
835 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
836 if (!pSeg)
837 return VERR_NO_MEMORY;
838 pSeg->pNext = NULL;
839 pSeg->cPages = cPages;
840 pSeg->iPage = pThis->cPages;
841 pSeg->cPTs = 0;
842 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
843 if (RT_SUCCESS(rc))
844 {
845 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
846 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
847 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
848
849 /*
850 * Walk thru the pages and set them up with a mapping of their PTE and everything.
851 */
852 ASMIntDisable();
853 PGMR0DYNMAPPGLVL PgLvl;
854 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
855 uint32_t const iEndPage = pThis->cPages + cPages;
856 for (uint32_t iPage = pThis->cPages;
857 iPage < iEndPage;
858 iPage++, pbPage += PAGE_SIZE)
859 {
860 /* Initialize the page data. */
861 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
862 pThis->paPages[iPage].pvPage = pbPage;
863 pThis->paPages[iPage].cRefs = 0;
864 pThis->paPages[iPage].uPte.pPae = 0;
865 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
866
867 /* Map its page table, retry until we've got a clean run (paranoia). */
868 do
869 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
870 &pThis->paPages[iPage].uPte.pv);
871 while (rc == VINF_TRY_AGAIN);
872 if (RT_FAILURE(rc))
873 break;
874
875 /* Save the PTE. */
876 if (pThis->fLegacyMode)
877 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
878 else
879 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
880
881#ifdef VBOX_STRICT
882 /* Check that we've got the right entry. */
883 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
884 RTHCPHYS HCPhysPte = pThis->fLegacyMode
885 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
886 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
887 if (HCPhysPage != HCPhysPte)
888 {
889 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
890 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
891 rc = VERR_INTERNAL_ERROR;
892 break;
893 }
894#endif
895 } /* for each page */
896 ASMIntEnable();
897
898 /* cleanup non-PT mappings */
899 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
900 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
901
902 if (RT_SUCCESS(rc))
903 {
904#if PGMR0DYNMAP_GUARD_PAGES > 0
905 /*
906 * Setup guard pages.
907 * (Note: TLBs will be shot down later on.)
908 */
909 uint32_t iPage = pThis->cPages;
910 while (iPage < iEndPage)
911 {
912 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
913 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
914 iPage++; /* the guarded page */
915 }
916
917 /* Make sure the very last page is a guard page too. */
918 iPage = iEndPage - 1;
919 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
920 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
921#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
922
923 /*
924 * Commit it by adding the segment to the list and updating the page count.
925 */
926 pSeg->pNext = pThis->pSegHead;
927 pThis->pSegHead = pSeg;
928 pThis->cPages += cPages;
929 return VINF_SUCCESS;
930 }
931
932 /*
933 * Bail out.
934 */
935 while (pSeg->cPTs-- > 0)
936 {
937 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
938 AssertRC(rc2);
939 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
940 }
941
942 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
943 AssertRC(rc2);
944 pSeg->hMemObj = NIL_RTR0MEMOBJ;
945 }
946 RTMemFree(pSeg);
947
948 /* Don't bother resizing the arrays, but free them if we're the only user. */
949 if (!pThis->cPages)
950 {
951 RTMemFree(pThis->paPages);
952 pThis->paPages = NULL;
953 RTMemFree(pThis->pvSavedPTEs);
954 pThis->pvSavedPTEs = NULL;
955 }
956 return rc;
957}
958
959
960/**
961 * Called by PGMR0DynMapInitVM under the init lock.
962 *
963 * @returns VBox status code.
964 * @param pThis The dynamic mapping cache instance.
965 */
966static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
967{
968 /*
969 * Calc the size and add a segment of that size.
970 */
971 uint32_t cMinPages;
972 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
973 AssertReturn(cPages, VERR_INTERNAL_ERROR);
974 int rc = pgmR0DynMapAddSeg(pThis, cPages);
975 if (rc == VERR_NO_MEMORY)
976 {
977 /*
978 * Try adding smaller segments.
979 */
980 do
981 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
982 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
983 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
984 rc = VINF_SUCCESS;
985 if (rc == VERR_NO_MEMORY)
986 {
987 if (pThis->cPages)
988 pgmR0DynMapTearDown(pThis);
989 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
990 }
991 }
992 Assert(ASMGetFlags() & X86_EFL_IF);
993
994#if PGMR0DYNMAP_GUARD_PAGES > 0
995 /* paranoia */
996 if (RT_SUCCESS(rc))
997 pgmR0DynMapTlbShootDown(pThis);
998#endif
999 return rc;
1000}
1001
1002
1003/**
1004 * Called by PGMR0DynMapInitVM under the init lock.
1005 *
1006 * @returns VBox status code.
1007 * @param pThis The dynamic mapping cache instance.
1008 */
1009static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1010{
1011 /*
1012 * Calc the new target size and add a segment of the appropriate size.
1013 */
1014 uint32_t cMinPages;
1015 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1016 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1017 if (pThis->cPages >= cPages)
1018 return VINF_SUCCESS;
1019
1020 uint32_t cAdd = cPages - pThis->cPages;
1021 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1022 if (rc == VERR_NO_MEMORY)
1023 {
1024 /*
1025 * Try adding smaller segments.
1026 */
1027 do
1028 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1029 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1030 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1031 rc = VINF_SUCCESS;
1032 if (rc == VERR_NO_MEMORY)
1033 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1034 }
1035 Assert(ASMGetFlags() & X86_EFL_IF);
1036
1037#if PGMR0DYNMAP_GUARD_PAGES > 0
1038 /* paranoia */
1039 if (RT_SUCCESS(rc))
1040 pgmR0DynMapTlbShootDown(pThis);
1041#endif
1042 return rc;
1043}
1044
1045
1046/**
1047 * Called by PGMR0DynMapTermVM under the init lock.
1048 *
1049 * @returns VBox status code.
1050 * @param pThis The dynamic mapping cache instance.
1051 */
1052static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1053{
1054 /*
1055 * Restore the original page table entries
1056 */
1057 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1058 uint32_t iPage = pThis->cPages;
1059 if (pThis->fLegacyMode)
1060 {
1061 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1062 while (iPage-- > 0)
1063 {
1064 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1065 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1066 X86PGUINT uNew = paSavedPTEs[iPage];
1067 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1068 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1069 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1070 }
1071 }
1072 else
1073 {
1074 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1075 while (iPage-- > 0)
1076 {
1077 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1078 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1079 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1080 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1081 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1082 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1083 }
1084 }
1085
1086 /*
1087 * Shoot down the TLBs on all CPUs before freeing them.
1088 */
1089 pgmR0DynMapTlbShootDown(pThis);
1090
1091 /*
1092 * Free the segments.
1093 */
1094 while (pThis->pSegHead)
1095 {
1096 int rc;
1097 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1098 pThis->pSegHead = pSeg->pNext;
1099
1100 uint32_t iPT = pSeg->cPTs;
1101 while (iPT-- > 0)
1102 {
1103 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1104 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1105 }
1106 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1107 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1108 pSeg->pNext = NULL;
1109 pSeg->iPage = UINT16_MAX;
1110 pSeg->cPages = 0;
1111 pSeg->cPTs = 0;
1112 RTMemFree(pSeg);
1113 }
1114
1115 /*
1116 * Free the arrays and restore the initial state.
1117 * The cLoadMax value is left behind for the next setup.
1118 */
1119 RTMemFree(pThis->paPages);
1120 pThis->paPages = NULL;
1121 RTMemFree(pThis->pvSavedPTEs);
1122 pThis->pvSavedPTEs = NULL;
1123 pThis->cPages = 0;
1124 pThis->cLoad = 0;
1125 pThis->cGuardPages = 0;
1126}
1127
1128
1129/**
1130 * Release references to a page, caller owns the spin lock.
1131 *
1132 * @param pThis The dynamic mapping cache instance.
1133 * @param iPage The page.
1134 * @param cRefs The number of references to release.
1135 */
1136DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1137{
1138 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1139 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1140 if (!cRefs)
1141 pThis->cLoad--;
1142}
1143
1144
1145/**
1146 * Release references to a page, caller does not own the spin lock.
1147 *
1148 * @param pThis The dynamic mapping cache instance.
1149 * @param iPage The page.
1150 * @param cRefs The number of references to release.
1151 */
1152static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1153{
1154 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1155 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1156 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1157 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1158}
1159
1160
1161/**
1162 * pgmR0DynMapPage worker that deals with the tedious bits.
1163 *
1164 * @returns The page index on success, UINT32_MAX on failure.
1165 * @param pThis The dynamic mapping cache instance.
1166 * @param HCPhys The address of the page to be mapped.
1167 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1168 */
1169static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage)
1170{
1171 /*
1172 * Check if any of the first 5 pages are unreferenced since the caller
1173 * already has made sure they aren't matching.
1174 */
1175 uint32_t const cPages = pThis->cPages;
1176 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1177 uint32_t iFreePage;
1178 if (!paPages[iPage].cRefs)
1179 iFreePage = iPage;
1180 else if (!paPages[(iPage + 1) % cPages].cRefs)
1181 iFreePage = (iPage + 1) % cPages;
1182 else if (!paPages[(iPage + 2) % cPages].cRefs)
1183 iFreePage = (iPage + 2) % cPages;
1184 else if (!paPages[(iPage + 3) % cPages].cRefs)
1185 iFreePage = (iPage + 3) % cPages;
1186 else if (!paPages[(iPage + 4) % cPages].cRefs)
1187 iFreePage = (iPage + 4) % cPages;
1188 else
1189 {
1190 /*
1191 * Search for an unused or matching entry.
1192 */
1193 iFreePage = (iPage + 5) % cPages;
1194 for (;;)
1195 {
1196 if (paPages[iFreePage].HCPhys == HCPhys)
1197 return iFreePage;
1198 if (!paPages[iFreePage].cRefs)
1199 break;
1200
1201 /* advance */
1202 iFreePage = (iFreePage + 1) % cPages;
1203 if (RT_UNLIKELY(iFreePage == iPage))
1204 return UINT32_MAX;
1205 }
1206 }
1207 Assert(iFreePage < cPages);
1208
1209 /*
1210 * Setup the new entry.
1211 */
1212 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1213 paPages[iFreePage].HCPhys = HCPhys;
1214 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1215 if (pThis->fLegacyMode)
1216 {
1217 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1218 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1219 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1220 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1221 | (HCPhys & X86_PTE_PG_MASK);
1222 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1223 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1224 Assert(paPages[iPage].uPte.pLegacy->u == uNew);
1225 }
1226 else
1227 {
1228 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1229 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1230 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1231 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1232 | (HCPhys & X86_PTE_PAE_PG_MASK);
1233 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1234 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1235 Assert(paPages[iPage].uPte.pPae->u == uNew);
1236 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1237 }
1238 return iFreePage;
1239}
1240
1241
1242/**
1243 * Maps a page into the pool.
1244 *
1245 * @returns Page index on success, UINT32_MAX on failure.
1246 * @param pThis The dynamic mapping cache instance.
1247 * @param HCPhys The address of the page to be mapped.
1248 * @param ppvPage Where to the page address.
1249 */
1250DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, void **ppvPage)
1251{
1252 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1253 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1254 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1255
1256 /*
1257 * Find an entry, if possible a matching one. The HCPhys address is hashed
1258 * down to a page index, collisions are handled by linear searching. Optimize
1259 * for a hit in the first 5 pages.
1260 *
1261 * To the cheap hits here and defer the tedious searching and inserting
1262 * to a helper function.
1263 */
1264 uint32_t const cPages = pThis->cPages;
1265 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1266 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1267 if (paPages[iPage].HCPhys != HCPhys)
1268 {
1269 uint32_t iPage2 = (iPage + 1) % cPages;
1270 if (paPages[iPage2].HCPhys != HCPhys)
1271 {
1272 iPage2 = (iPage + 2) % cPages;
1273 if (paPages[iPage2].HCPhys != HCPhys)
1274 {
1275 iPage2 = (iPage + 3) % cPages;
1276 if (paPages[iPage2].HCPhys != HCPhys)
1277 {
1278 iPage2 = (iPage + 4) % cPages;
1279 if (paPages[iPage2].HCPhys != HCPhys)
1280 {
1281 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage);
1282 if (RT_UNLIKELY(iPage == UINT32_MAX))
1283 {
1284 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1285 return iPage;
1286 }
1287 }
1288 else
1289 iPage = iPage2;
1290 }
1291 else
1292 iPage = iPage2;
1293 }
1294 else
1295 iPage = iPage2;
1296 }
1297 else
1298 iPage = iPage2;
1299 }
1300
1301 /*
1302 * Reference it, update statistics and get the return address.
1303 */
1304 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1305 if (cRefs == 1)
1306 {
1307 pThis->cLoad++;
1308 if (pThis->cLoad > pThis->cMaxLoad)
1309 pThis->cMaxLoad = pThis->cLoad;
1310 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1311 }
1312 else if (RT_UNLIKELY(cRefs <= 0))
1313 {
1314 ASMAtomicDecS32(&paPages[iPage].cRefs);
1315 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1316 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1317 }
1318 void *pvPage = paPages[iPage].pvPage;
1319
1320 /*
1321 * Invalidate the entry?
1322 */
1323 RTCPUID idRealCpu = RTMpCpuId();
1324 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1325 if (fInvalidateIt)
1326 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1327
1328 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1329
1330 /*
1331 * Do the actual invalidation outside the spinlock.
1332 */
1333 ASMInvalidatePage(pvPage);
1334
1335 *ppvPage = pvPage;
1336 return iPage;
1337}
1338
1339
1340/**
1341 * Assert the the integrity of the pool.
1342 *
1343 * @returns VBox status code.
1344 */
1345VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1346{
1347 /*
1348 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1349 */
1350 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1351 if (!pThis)
1352 return VINF_SUCCESS;
1353 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1354 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1355 if (!pThis->cUsers)
1356 return VERR_INVALID_PARAMETER;
1357
1358
1359 int rc = VINF_SUCCESS;
1360 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1361 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1362#define CHECK_RET(expr, a) \
1363 do { \
1364 if (!(expr)) \
1365 { \
1366 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1367 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1368 AssertMsg2 a; \
1369 return VERR_INTERNAL_ERROR; \
1370 } \
1371 } while (0)
1372
1373 /*
1374 * Check that the PTEs are correct.
1375 */
1376 uint32_t cGuard = 0;
1377 uint32_t cLoad = 0;
1378 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1379 uint32_t iPage = pThis->cPages;
1380 if (pThis->fLegacyMode)
1381 {
1382 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1383 while (iPage-- > 0)
1384 {
1385 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1386 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1387 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1388 {
1389#ifdef PGMR0DYNMAP_GUARD_NP
1390 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1391 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1392#else
1393 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1394 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1395#endif
1396 cGuard++;
1397 }
1398 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1399 {
1400 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1401 X86PGUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1402 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1403 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1404 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1405 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1406 if (paPages[iPage].cRefs)
1407 cLoad++;
1408 }
1409 else
1410 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1411 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1412 }
1413 }
1414 else
1415 {
1416 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1417 while (iPage-- > 0)
1418 {
1419 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1420 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1421 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1422 {
1423#ifdef PGMR0DYNMAP_GUARD_NP
1424 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1425 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1426#else
1427 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1428 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1429#endif
1430 cGuard++;
1431 }
1432 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1433 {
1434 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1435 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1436 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1437 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1438 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1439 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1440 if (paPages[iPage].cRefs)
1441 cLoad++;
1442 }
1443 else
1444 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1445 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1446 }
1447 }
1448
1449 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1450 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1451
1452#undef CHECK_RET
1453 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1454 return VINF_SUCCESS;
1455}
1456
1457
1458/**
1459 * Signals the start of a new set of mappings.
1460 *
1461 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1462 * API is called.
1463 *
1464 * @param pVCpu The shared data for the current virtual CPU.
1465 */
1466VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1467{
1468 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1469 pVCpu->pgm.s.AutoSet.cEntries = 0;
1470}
1471
1472
1473/**
1474 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1475 * since the PGMDynMapStartAutoSet call.
1476 *
1477 * If the set is already closed, nothing will be done.
1478 *
1479 * @param pVCpu The shared data for the current virtual CPU.
1480 */
1481VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1482{
1483 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1484
1485 /*
1486 * Is the set open?
1487 *
1488 * We might be closed before VM execution and not reopened again before
1489 * we leave for ring-3 or something.
1490 */
1491 uint32_t i = pSet->cEntries;
1492 if (i != PGMMAPSET_CLOSED)
1493 {
1494 /*
1495 * Close the set
1496 */
1497 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1498 pSet->cEntries = PGMMAPSET_CLOSED;
1499
1500 /*
1501 * Release any pages it's referencing.
1502 */
1503 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1504 {
1505 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1506 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1507 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1508
1509 while (i-- > 0)
1510 {
1511 uint32_t iPage = pSet->aEntries[i].iPage;
1512 Assert(iPage < pThis->cPages);
1513 int32_t cRefs = pSet->aEntries[i].cRefs;
1514 Assert(cRefs > 0);
1515 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1516
1517 pSet->aEntries[i].iPage = UINT16_MAX;
1518 pSet->aEntries[i].cRefs = 0;
1519 }
1520
1521 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1522 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1523 }
1524 }
1525}
1526
1527
1528/**
1529 * Migrates the automatic mapping set of the current vCPU if it's active and
1530 * necessary.
1531 *
1532 * This is called when re-entering the hardware assisted execution mode after a
1533 * nip down to ring-3. We run the risk that the CPU might have change and we
1534 * will therefore make sure all the cache entries currently in the auto set will
1535 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1536 * the entries will have been flagged as invalidated.
1537 *
1538 * @param pVCpu The shared data for the current virtual CPU.
1539 * @thread EMT
1540 */
1541VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1542{
1543 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1544 uint32_t i = pSet->cEntries;
1545 if (i != PGMMAPSET_CLOSED)
1546 {
1547 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1548 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1549 {
1550 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1551 RTCPUID idRealCpu = RTMpCpuId();
1552
1553 while (i-- > 0)
1554 {
1555 Assert(pSet->aEntries[i].cRefs > 0);
1556 uint32_t iPage = pSet->aEntries[i].iPage;
1557 Assert(iPage < pThis->cPages);
1558 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1559 {
1560 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1561 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1562 }
1563 }
1564 }
1565 }
1566}
1567
1568
1569/**
1570 * As a final resort for a full auto set, try merge duplicate entries.
1571 *
1572 * @param pSet The set.
1573 */
1574static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1575{
1576 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1577 {
1578 uint16_t const iPage = pSet->aEntries[i].iPage;
1579 uint32_t j = i + 1;
1580 while (j < pSet->cEntries)
1581 {
1582 if (pSet->aEntries[j].iPage != iPage)
1583 j++;
1584 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1585 {
1586 /* merge j into i removing j. */
1587 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1588 pSet->cEntries--;
1589 if (j < pSet->cEntries)
1590 {
1591 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1592 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1593 pSet->aEntries[pSet->cEntries].cRefs = 0;
1594 }
1595 else
1596 {
1597 pSet->aEntries[j].iPage = UINT16_MAX;
1598 pSet->aEntries[j].cRefs = 0;
1599 }
1600 }
1601 else
1602 {
1603 /* migrate the max number of refs from j into i and quit the inner loop. */
1604 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1605 Assert(pSet->aEntries[j].cRefs > cMigrate);
1606 pSet->aEntries[j].cRefs -= cMigrate;
1607 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1608 break;
1609 }
1610 }
1611 }
1612}
1613
1614
1615/* documented elsewhere - a bit of a mess. */
1616VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1617{
1618 /*
1619 * Validate state.
1620 */
1621 AssertPtr(ppv);
1622 *ppv = NULL;
1623 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1624 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1625 VERR_ACCESS_DENIED);
1626 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1627 PVMCPU pVCpu = VMMGetCpu(pVM);
1628 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1629 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1630 AssertMsgReturn(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1631 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1632
1633 /*
1634 * Map it.
1635 */
1636 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, ppv);
1637 if (RT_UNLIKELY(iPage == UINT32_MAX))
1638 {
1639 static uint32_t s_cBitched = 0;
1640 if (++s_cBitched < 10)
1641 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1642 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages));
1643 return VERR_PGM_DYNMAP_FAILED;
1644 }
1645
1646 /*
1647 * Add the page to the auto reference set.
1648 * If it's less than half full, don't bother looking for duplicates.
1649 */
1650 if (pSet->cEntries < RT_ELEMENTS(pSet->aEntries) / 2)
1651 {
1652 pSet->aEntries[pSet->cEntries].cRefs = 1;
1653 pSet->aEntries[pSet->cEntries].iPage = iPage;
1654 pSet->cEntries++;
1655 }
1656 else
1657 {
1658 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1659 int32_t i = pSet->cEntries;
1660 while (i-- > 0)
1661 if ( pSet->aEntries[i].iPage == iPage
1662 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1663 {
1664 pSet->aEntries[i].cRefs++;
1665 break;
1666 }
1667 if (i < 0)
1668 {
1669 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1670 pgmDynMapOptimizeAutoSet(pSet);
1671 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1672 {
1673 pSet->aEntries[pSet->cEntries].cRefs = 1;
1674 pSet->aEntries[pSet->cEntries].iPage = iPage;
1675 pSet->cEntries++;
1676 }
1677 else
1678 {
1679 /* We're screwed. */
1680 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1681
1682 static uint32_t s_cBitched = 0;
1683 if (++s_cBitched < 10)
1684 LogRel(("PGMDynMapHCPage: set is full!\n"));
1685 *ppv = NULL;
1686 return VERR_PGM_DYNMAP_FULL_SET;
1687 }
1688 }
1689 }
1690
1691 return VINF_SUCCESS;
1692}
1693
1694
1695#ifdef DEBUG
1696/** For pgmR0DynMapTest3PerCpu. */
1697typedef struct PGMR0DYNMAPTEST
1698{
1699 uint32_t u32Expect;
1700 uint32_t *pu32;
1701 uint32_t volatile cFailures;
1702} PGMR0DYNMAPTEST;
1703typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1704
1705/**
1706 * Checks that the content of the page is the same on all CPUs, i.e. that there
1707 * are no CPU specfic PTs or similar nasty stuff involved.
1708 *
1709 * @param idCpu The current CPU.
1710 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1711 * @param pvUser2 Unused, ignored.
1712 */
1713static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1714{
1715 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1716 ASMInvalidatePage(pTest->pu32);
1717 if (*pTest->pu32 != pTest->u32Expect)
1718 ASMAtomicIncU32(&pTest->cFailures);
1719 NOREF(pvUser2); NOREF(idCpu);
1720}
1721
1722
1723/**
1724 * Performs some basic tests in debug builds.
1725 */
1726static int pgmR0DynMapTest(PVM pVM)
1727{
1728 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
1729 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1730 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
1731 uint32_t i;
1732
1733 /*
1734 * Assert internal integrity first.
1735 */
1736 LogRel(("Test #0\n"));
1737 int rc = PGMR0DynMapAssertIntegrity();
1738 if (RT_FAILURE(rc))
1739 return rc;
1740
1741 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
1742 pVM->pgm.s.pvR0DynMapUsed = pThis;
1743
1744 /*
1745 * Simple test, map CR3 twice and check that we're getting the
1746 * same mapping address back.
1747 */
1748 LogRel(("Test #1\n"));
1749 ASMIntDisable();
1750 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1751
1752 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
1753 void *pv = (void *)(intptr_t)-1;
1754 void *pv2 = (void *)(intptr_t)-2;
1755 rc = PGMDynMapHCPage(pVM, cr3, &pv);
1756 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
1757 ASMIntEnable();
1758 if ( RT_SUCCESS(rc2)
1759 && RT_SUCCESS(rc)
1760 && pv == pv2)
1761 {
1762 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1763 rc = PGMR0DynMapAssertIntegrity();
1764
1765 /*
1766 * Check that the simple set overflow code works by filling it
1767 * with more CR3 mappings.
1768 */
1769 LogRel(("Test #2\n"));
1770 ASMIntDisable();
1771 for (i = 0 ; i < UINT16_MAX*2 + RT_ELEMENTS(pSet->aEntries) / 2 && RT_SUCCESS(rc) && pv2 == pv; i++)
1772 {
1773 pv2 = (void *)(intptr_t)-4;
1774 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
1775 }
1776 ASMIntEnable();
1777 if (RT_FAILURE(rc) || pv != pv2)
1778 {
1779 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
1780 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1781 }
1782 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries) / 2)
1783 {
1784 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
1785 rc = VERR_INTERNAL_ERROR;
1786 }
1787 else if ( pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 1].cRefs != UINT16_MAX - 1
1788 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 2].cRefs != UINT16_MAX - 1
1789 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 3].cRefs != 2+2+3
1790 || pSet->aEntries[(RT_ELEMENTS(pSet->aEntries) / 2) - 4].cRefs != 1)
1791 {
1792 LogRel(("failed(%d): bad set dist: ", __LINE__));
1793 for (i = 0; i < pSet->cEntries; i++)
1794 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
1795 LogRel(("\n"));
1796 rc = VERR_INTERNAL_ERROR;
1797 }
1798 if (RT_SUCCESS(rc))
1799 rc = PGMR0DynMapAssertIntegrity();
1800 if (RT_SUCCESS(rc))
1801 {
1802 /*
1803 * Trigger an set optimization run (exactly).
1804 */
1805 LogRel(("Test #3\n"));
1806 ASMIntDisable();
1807 pv2 = NULL;
1808 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) / 2 && RT_SUCCESS(rc) && pv2 != pv; i++)
1809 {
1810 pv2 = (void *)(intptr_t)(-5 - i);
1811 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
1812 }
1813 ASMIntEnable();
1814 if (RT_FAILURE(rc) || pv == pv2)
1815 {
1816 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
1817 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1818 }
1819 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
1820 {
1821 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1822 rc = VERR_INTERNAL_ERROR;
1823 }
1824 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1825 if (RT_SUCCESS(rc))
1826 rc = PGMR0DynMapAssertIntegrity();
1827 if (RT_SUCCESS(rc))
1828 {
1829 /*
1830 * Trigger an overflow error.
1831 */
1832 LogRel(("Test #4\n"));
1833 ASMIntDisable();
1834 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) / 2 - 3 + 1 && pv2 != pv; i++)
1835 {
1836 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
1837 if (RT_SUCCESS(rc))
1838 rc = PGMR0DynMapAssertIntegrity();
1839 if (RT_FAILURE(rc))
1840 break;
1841 }
1842 ASMIntEnable();
1843 if (rc == VERR_PGM_DYNMAP_FULL_SET)
1844 {
1845 /* flush the set. */
1846 LogRel(("Test #5\n"));
1847 ASMIntDisable();
1848 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1849 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1850 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1851 ASMIntEnable();
1852
1853 rc = PGMR0DynMapAssertIntegrity();
1854 }
1855 else
1856 {
1857 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
1858 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
1859 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1860 }
1861 }
1862 }
1863 }
1864 else
1865 {
1866 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
1867 if (RT_SUCCESS(rc))
1868 rc = rc2;
1869 }
1870
1871 /*
1872 * Check that everyone sees the same stuff.
1873 */
1874 if (RT_SUCCESS(rc))
1875 {
1876 LogRel(("Test #5\n"));
1877 ASMIntDisable();
1878 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
1879 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
1880 if (RT_SUCCESS(rc))
1881 {
1882 PGMR0DYNMAPTEST Test;
1883 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
1884 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
1885 Test.u32Expect = *pu32Real;
1886 ASMAtomicWriteU32(&Test.cFailures, 0);
1887 ASMIntEnable();
1888
1889 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
1890 if (RT_FAILURE(rc))
1891 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
1892 else if (Test.cFailures)
1893 {
1894 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
1895 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1896 rc = VERR_INTERNAL_ERROR;
1897 }
1898 else
1899 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
1900 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1901 }
1902 else
1903 {
1904 ASMIntEnable();
1905 LogRel(("failed(%d): rc=%Rrc\n", rc));
1906 }
1907 }
1908
1909 /*
1910 * Clean up.
1911 */
1912 LogRel(("Cleanup.\n"));
1913 ASMIntDisable();
1914 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1915 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1916 ASMIntEnable();
1917
1918 if (RT_SUCCESS(rc))
1919 rc = PGMR0DynMapAssertIntegrity();
1920 else
1921 PGMR0DynMapAssertIntegrity();
1922
1923 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
1924 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1925 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
1926 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
1927 return rc;
1928}
1929#endif /* DEBUG */
1930
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette