VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14853

Last change on this file since 14853 was 14853, checked in by vboxsync, 16 years ago

PGMR0DynMap: Sane PGMR0DYNMAP_GUARD_PAGES values.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 69.6 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14853 2008-12-01 12:51:26Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PGM
26#include <VBox/pgm.h>
27#include "../PGMInternal.h"
28#include <VBox/vm.h>
29#include <VBox/sup.h>
30#include <VBox/err.h>
31#include <iprt/asm.h>
32#include <iprt/alloc.h>
33#include <iprt/assert.h>
34#include <iprt/cpuset.h>
35#include <iprt/memobj.h>
36#include <iprt/mp.h>
37#include <iprt/semaphore.h>
38#include <iprt/spinlock.h>
39#include <iprt/string.h>
40
41
42/*******************************************************************************
43* Defined Constants And Macros *
44*******************************************************************************/
45/** The max size of the mapping cache (in pages). */
46#define PGMR0DYNMAP_MAX_PAGES ((8*_1M) >> PAGE_SHIFT)
47/** The small segment size that is adopted on out-of-memory conditions with a
48 * single big segment. */
49#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
50/** The number of pages we reserve per CPU. */
51#define PGMR0DYNMAP_PAGES_PER_CPU 64
52/** The number of guard pages.
53 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
54#if defined(VBOX_STRICT)
55# define PGMR0DYNMAP_GUARD_PAGES 1
56#else
57# define PGMR0DYNMAP_GUARD_PAGES 0
58#endif
59/** The dummy physical address of guard pages. */
60#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
61/** The dummy reference count of guard pages. (Must be non-zero.) */
62#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
63#if 0
64/** Define this to just clear the present bit on guard pages.
65 * The alternative is to replace the entire PTE with an bad not-present
66 * PTE. Either way, XNU will screw us. :-/ */
67#define PGMR0DYNMAP_GUARD_NP
68#endif
69/** The dummy PTE value for a page. */
70#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
71/** The dummy PTE value for a page. */
72#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
73/** Calcs the overload threshold. Current set at 50%. */
74#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
75
76#if 0
77/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */
78//#define RTSpinlockAcquire(a,b) do {} while (0)
79//#define RTSpinlockRelease(a,b) do {} while (0)
80#endif
81
82
83/*******************************************************************************
84* Structures and Typedefs *
85*******************************************************************************/
86/**
87 * Ring-0 dynamic mapping cache segment.
88 *
89 * The dynamic mapping cache can be extended with additional segments if the
90 * load is found to be too high. This done the next time a VM is created, under
91 * the protection of the init mutex. The arrays is reallocated and the new
92 * segment is added to the end of these. Nothing is rehashed of course, as the
93 * indexes / addresses must remain unchanged.
94 *
95 * This structure is only modified while owning the init mutex or during module
96 * init / term.
97 */
98typedef struct PGMR0DYNMAPSEG
99{
100 /** Pointer to the next segment. */
101 struct PGMR0DYNMAPSEG *pNext;
102 /** The memory object for the virtual address range that we're abusing. */
103 RTR0MEMOBJ hMemObj;
104 /** The start page in the cache. (I.e. index into the arrays.) */
105 uint16_t iPage;
106 /** The number of pages this segment contributes. */
107 uint16_t cPages;
108 /** The number of page tables. */
109 uint16_t cPTs;
110 /** The memory objects for the page tables. */
111 RTR0MEMOBJ ahMemObjPTs[1];
112} PGMR0DYNMAPSEG;
113/** Pointer to a ring-0 dynamic mapping cache segment. */
114typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
115
116
117/**
118 * Ring-0 dynamic mapping cache entry.
119 *
120 * This structure tracks
121 */
122typedef struct PGMR0DYNMAPENTRY
123{
124 /** The physical address of the currently mapped page.
125 * This is duplicate for three reasons: cache locality, cache policy of the PT
126 * mappings and sanity checks. */
127 RTHCPHYS HCPhys;
128 /** Pointer to the page. */
129 void *pvPage;
130 /** The number of references. */
131 int32_t volatile cRefs;
132 /** PTE pointer union. */
133 union PGMR0DYNMAPENTRY_PPTE
134 {
135 /** PTE pointer, 32-bit legacy version. */
136 PX86PTE pLegacy;
137 /** PTE pointer, PAE version. */
138 PX86PTEPAE pPae;
139 /** PTE pointer, the void version. */
140 void *pv;
141 } uPte;
142 /** CPUs that haven't invalidated this entry after it's last update. */
143 RTCPUSET PendingSet;
144} PGMR0DYNMAPENTRY;
145/** Pointer to a ring-0 dynamic mapping cache entry. */
146typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
147
148
149/**
150 * Ring-0 dynamic mapping cache.
151 *
152 * This is initialized during VMMR0 module init but no segments are allocated at
153 * that time. Segments will be added when the first VM is started and removed
154 * again when the last VM shuts down, thus avoid consuming memory while dormant.
155 * At module termination, the remaining bits will be freed up.
156 */
157typedef struct PGMR0DYNMAP
158{
159 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
160 uint32_t u32Magic;
161 /** Spinlock serializing the normal operation of the cache. */
162 RTSPINLOCK hSpinlock;
163 /** Array for tracking and managing the pages. */
164 PPGMR0DYNMAPENTRY paPages;
165 /** The cache size given as a number of pages. */
166 uint32_t cPages;
167 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
168 bool fLegacyMode;
169 /** The current load.
170 * This does not include guard pages. */
171 uint32_t cLoad;
172 /** The max load ever.
173 * This is maintained to get trigger adding of more mapping space. */
174 uint32_t cMaxLoad;
175 /** Initialization / termination lock. */
176 RTSEMFASTMUTEX hInitLock;
177 /** The number of guard pages. */
178 uint32_t cGuardPages;
179 /** The number of users (protected by hInitLock). */
180 uint32_t cUsers;
181 /** Array containing a copy of the original page tables.
182 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
183 void *pvSavedPTEs;
184 /** List of segments. */
185 PPGMR0DYNMAPSEG pSegHead;
186 /** The paging mode. */
187 SUPPAGINGMODE enmPgMode;
188} PGMR0DYNMAP;
189/** Pointer to the ring-0 dynamic mapping cache */
190typedef PGMR0DYNMAP *PPGMR0DYNMAP;
191
192/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
193#define PGMR0DYNMAP_MAGIC 0x19640201
194
195
196/**
197 * Paging level data.
198 */
199typedef struct PGMR0DYNMAPPGLVL
200{
201 uint32_t cLevels; /**< The number of levels. */
202 struct
203 {
204 RTHCPHYS HCPhys; /**< The address of the page for the current level,
205 * i.e. what hMemObj/hMapObj is currently mapping. */
206 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
207 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
208 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
209 uint32_t fPtrShift; /**< The pointer shift count. */
210 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
211 uint64_t fAndMask; /**< And mask to check entry flags. */
212 uint64_t fResMask; /**< The result from applying fAndMask. */
213 union
214 {
215 void *pv; /**< hMapObj address. */
216 PX86PGUINT paLegacy; /**< Legacy table view. */
217 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
218 } u;
219 } a[4];
220} PGMR0DYNMAPPGLVL;
221/** Pointer to paging level data. */
222typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
223
224
225/*******************************************************************************
226* Global Variables *
227*******************************************************************************/
228/** Pointer to the ring-0 dynamic mapping cache. */
229static PPGMR0DYNMAP g_pPGMR0DynMap;
230
231
232/*******************************************************************************
233* Internal Functions *
234*******************************************************************************/
235static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
236static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
237static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
238static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
239#ifdef DEBUG
240static int pgmR0DynMapTest(PVM pVM);
241#endif
242
243
244/**
245 * Initializes the ring-0 dynamic mapping cache.
246 *
247 * @returns VBox status code.
248 */
249VMMR0DECL(int) PGMR0DynMapInit(void)
250{
251 Assert(!g_pPGMR0DynMap);
252
253 /*
254 * Create and initialize the cache instance.
255 */
256 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
257 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
258 int rc = VINF_SUCCESS;
259 pThis->enmPgMode = SUPR0GetPagingMode();
260 switch (pThis->enmPgMode)
261 {
262 case SUPPAGINGMODE_32_BIT:
263 case SUPPAGINGMODE_32_BIT_GLOBAL:
264 pThis->fLegacyMode = false;
265 break;
266 case SUPPAGINGMODE_PAE:
267 case SUPPAGINGMODE_PAE_GLOBAL:
268 case SUPPAGINGMODE_PAE_NX:
269 case SUPPAGINGMODE_PAE_GLOBAL_NX:
270 case SUPPAGINGMODE_AMD64:
271 case SUPPAGINGMODE_AMD64_GLOBAL:
272 case SUPPAGINGMODE_AMD64_NX:
273 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
274 pThis->fLegacyMode = false;
275 break;
276 default:
277 rc = VERR_INTERNAL_ERROR;
278 break;
279 }
280 if (RT_SUCCESS(rc))
281 {
282 rc = RTSemFastMutexCreate(&pThis->hInitLock);
283 if (RT_SUCCESS(rc))
284 {
285 rc = RTSpinlockCreate(&pThis->hSpinlock);
286 if (RT_SUCCESS(rc))
287 {
288 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
289 g_pPGMR0DynMap = pThis;
290 return VINF_SUCCESS;
291 }
292 RTSemFastMutexDestroy(pThis->hInitLock);
293 }
294 }
295 RTMemFree(pThis);
296 return rc;
297}
298
299
300/**
301 * Terminates the ring-0 dynamic mapping cache.
302 */
303VMMR0DECL(void) PGMR0DynMapTerm(void)
304{
305 /*
306 * Destroy the cache.
307 *
308 * There is not supposed to be any races here, the loader should
309 * make sure about that. So, don't bother locking anything.
310 *
311 * The VM objects should all be destroyed by now, so there is no
312 * dangling users or anything like that to clean up. This routine
313 * is just a mirror image of PGMR0DynMapInit.
314 */
315 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
316 if (pThis)
317 {
318 AssertPtr(pThis);
319 g_pPGMR0DynMap = NULL;
320
321 /* This should *never* happen, but in case it does try not to leak memory. */
322 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
323 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
324 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
325 if (pThis->paPages)
326 pgmR0DynMapTearDown(pThis);
327
328 /* Free the associated resources. */
329 RTSemFastMutexDestroy(pThis->hInitLock);
330 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
331 RTSpinlockDestroy(pThis->hSpinlock);
332 pThis->hSpinlock = NIL_RTSPINLOCK;
333 pThis->u32Magic = UINT32_MAX;
334 RTMemFree(pThis);
335 }
336}
337
338
339/**
340 * Initializes the dynamic mapping cache for a new VM.
341 *
342 * @returns VBox status code.
343 * @param pVM Pointer to the shared VM structure.
344 */
345VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
346{
347 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
348
349 /*
350 * Initialize the auto sets.
351 */
352 VMCPUID idCpu = pVM->cCPUs;
353 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
354 while (idCpu-- > 0)
355 {
356 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
357 uint32_t j = RT_ELEMENTS(pSet->aEntries);
358 while (j-- > 0)
359 {
360 pSet->aEntries[j].iPage = UINT16_MAX;
361 pSet->aEntries[j].cRefs = 0;
362 }
363 pSet->cEntries = PGMMAPSET_CLOSED;
364 }
365
366 /*
367 * Do we need the cache? Skip the last bit if we don't.
368 */
369 if (!VMMIsHwVirtExtForced(pVM))
370 return VINF_SUCCESS;
371
372 /*
373 * Reference and if necessary setup or expand the cache.
374 */
375 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
376 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
377 int rc = RTSemFastMutexRequest(pThis->hInitLock);
378 AssertLogRelRCReturn(rc, rc);
379
380 pThis->cUsers++;
381 if (pThis->cUsers == 1)
382 {
383 rc = pgmR0DynMapSetup(pThis);
384#ifdef DEBUG
385 if (RT_SUCCESS(rc))
386 {
387 rc = pgmR0DynMapTest(pVM);
388 if (RT_FAILURE(rc))
389 pgmR0DynMapTearDown(pThis);
390 }
391#endif
392 }
393 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
394 rc = pgmR0DynMapExpand(pThis);
395 if (RT_SUCCESS(rc))
396 pVM->pgm.s.pvR0DynMapUsed = pThis;
397 else
398 pThis->cUsers--;
399
400 RTSemFastMutexRelease(pThis->hInitLock);
401 return rc;
402}
403
404
405/**
406 * Terminates the dynamic mapping cache usage for a VM.
407 *
408 * @param pVM Pointer to the shared VM structure.
409 */
410VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
411{
412 /*
413 * Return immediately if we're not using the cache.
414 */
415 if (!pVM->pgm.s.pvR0DynMapUsed)
416 return;
417
418 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
419 AssertPtrReturnVoid(pThis);
420
421 int rc = RTSemFastMutexRequest(pThis->hInitLock);
422 AssertLogRelRCReturnVoid(rc);
423
424 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
425 {
426 pVM->pgm.s.pvR0DynMapUsed = NULL;
427
428#ifdef VBOX_STRICT
429 PGMR0DynMapAssertIntegrity();
430#endif
431
432 /*
433 * Clean up and check the auto sets.
434 */
435 VMCPUID idCpu = pVM->cCPUs;
436 while (idCpu-- > 0)
437 {
438 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
439 uint32_t j = pSet->cEntries;
440 if (j <= RT_ELEMENTS(pSet->aEntries))
441 {
442 /*
443 * The set is open, close it.
444 */
445 while (j-- > 0)
446 {
447 int32_t cRefs = pSet->aEntries[j].cRefs;
448 uint32_t iPage = pSet->aEntries[j].iPage;
449 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
450 if (iPage < pThis->cPages && cRefs > 0)
451 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
452 else
453 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
454
455 pSet->aEntries[j].iPage = UINT16_MAX;
456 pSet->aEntries[j].cRefs = 0;
457 }
458 pSet->cEntries = PGMMAPSET_CLOSED;
459 }
460 else
461 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
462
463 j = RT_ELEMENTS(pSet->aEntries);
464 while (j-- > 0)
465 {
466 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
467 Assert(!pSet->aEntries[j].cRefs);
468 }
469 }
470
471 /*
472 * Release our reference to the mapping cache.
473 */
474 Assert(pThis->cUsers > 0);
475 pThis->cUsers--;
476 if (!pThis->cUsers)
477 pgmR0DynMapTearDown(pThis);
478 }
479 else
480 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
481
482 RTSemFastMutexRelease(pThis->hInitLock);
483}
484
485
486/**
487 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
488 *
489 * @param idCpu The current CPU.
490 * @param pvUser1 The dynamic mapping cache instance.
491 * @param pvUser2 Unused, NULL.
492 */
493static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
494{
495 Assert(!pvUser2);
496 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
497 Assert(pThis == g_pPGMR0DynMap);
498 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
499 uint32_t iPage = pThis->cPages;
500 while (iPage-- > 0)
501 ASMInvalidatePage(paPages[iPage].pvPage);
502}
503
504
505/**
506 * Shoot down the TLBs for every single cache entry on all CPUs.
507 *
508 * @returns IPRT status code (RTMpOnAll).
509 * @param pThis The dynamic mapping cache instance.
510 */
511static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis)
512{
513 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
514 AssertRC(rc);
515 if (RT_FAILURE(rc))
516 {
517 uint32_t iPage = pThis->cPages;
518 while (iPage-- > 0)
519 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
520 }
521 return rc;
522}
523
524
525/**
526 * Calculate the new cache size based on cMaxLoad statistics.
527 *
528 * @returns Number of pages.
529 * @param pThis The dynamic mapping cache instance.
530 * @param pcMinPages The minimal size in pages.
531 */
532static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
533{
534 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
535
536 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU (/2). */
537 RTCPUID cCpus = RTMpGetCount();
538 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
539 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
540 uint32_t cMinPages = cCpus * (PGMR0DYNMAP_PAGES_PER_CPU / 2);
541
542 /* adjust against cMaxLoad. */
543 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
544 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
545 pThis->cMaxLoad = 0;
546
547 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
548 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
549
550 if (pThis->cMaxLoad > cMinPages)
551 cMinPages = pThis->cMaxLoad;
552
553 /* adjust against max and current size. */
554 if (cPages < pThis->cPages)
555 cPages = pThis->cPages;
556 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
557 if (cPages > PGMR0DYNMAP_MAX_PAGES)
558 cPages = PGMR0DYNMAP_MAX_PAGES;
559
560 if (cMinPages < pThis->cPages)
561 cMinPages = pThis->cPages;
562 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
563 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
564 cMinPages = PGMR0DYNMAP_MAX_PAGES;
565
566 Assert(cMinPages);
567 *pcMinPages = cMinPages;
568 return cPages;
569}
570
571
572/**
573 * Initializes the paging level data.
574 *
575 * @param pThis The dynamic mapping cache instance.
576 * @param pPgLvl The paging level data.
577 */
578void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
579{
580 RTCCUINTREG cr4 = ASMGetCR4();
581 switch (pThis->enmPgMode)
582 {
583 case SUPPAGINGMODE_32_BIT:
584 case SUPPAGINGMODE_32_BIT_GLOBAL:
585 pPgLvl->cLevels = 2;
586 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
587 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
588 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
589 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
590 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
591
592 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
593 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
594 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
595 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
596 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
597 break;
598
599 case SUPPAGINGMODE_PAE:
600 case SUPPAGINGMODE_PAE_GLOBAL:
601 case SUPPAGINGMODE_PAE_NX:
602 case SUPPAGINGMODE_PAE_GLOBAL_NX:
603 pPgLvl->cLevels = 3;
604 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
605 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
606 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
607 pPgLvl->a[0].fAndMask = X86_PDPE_P;
608 pPgLvl->a[0].fResMask = X86_PDPE_P;
609
610 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
611 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
612 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
613 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
614 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
615
616 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
617 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
618 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
619 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
620 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
621 break;
622
623 case SUPPAGINGMODE_AMD64:
624 case SUPPAGINGMODE_AMD64_GLOBAL:
625 case SUPPAGINGMODE_AMD64_NX:
626 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
627 pPgLvl->cLevels = 4;
628 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
629 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
630 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
631 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
632 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
633
634 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
635 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
636 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
637 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
638 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
639
640 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
641 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
642 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
643 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
644 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
645
646 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
647 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
648 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
649 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
650 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
651 break;
652
653 default:
654 AssertFailed();
655 pPgLvl->cLevels = 0;
656 break;
657 }
658
659 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
660 {
661 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
662 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
663 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
664 pPgLvl->a[i].u.pv = NULL;
665 }
666}
667
668
669/**
670 * Maps a PTE.
671 *
672 * This will update the segment structure when new PTs are mapped.
673 *
674 * It also assumes that we (for paranoid reasons) wish to establish a mapping
675 * chain from CR3 to the PT that all corresponds to the processor we're
676 * currently running on, and go about this by running with interrupts disabled
677 * and restarting from CR3 for every change.
678 *
679 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
680 * to re-enable interrupts.
681 * @param pThis The dynamic mapping cache instance.
682 * @param pPgLvl The paging level structure.
683 * @param pvPage The page.
684 * @param pSeg The segment.
685 * @param cMaxPTs The max number of PTs expected in the segment.
686 * @param ppvPTE Where to store the PTE address.
687 */
688static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
689 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
690{
691 Assert(!(ASMGetFlags() & X86_EFL_IF));
692 void *pvEntry = NULL;
693 X86PGPAEUINT uEntry = ASMGetCR3();
694 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
695 {
696 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
697 if (pPgLvl->a[i].HCPhys != HCPhys)
698 {
699 /*
700 * Need to remap this level.
701 * The final level, the PT, will not be freed since that is what it's all about.
702 */
703 ASMIntEnable();
704 if (i + 1 == pPgLvl->cLevels)
705 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
706 else
707 {
708 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
709 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
710 }
711
712 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
713 if (RT_SUCCESS(rc))
714 {
715 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
716 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
717 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
718 if (RT_SUCCESS(rc))
719 {
720 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
721 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
722 pPgLvl->a[i].HCPhys = HCPhys;
723 if (i + 1 == pPgLvl->cLevels)
724 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
725 ASMIntDisable();
726 return VINF_TRY_AGAIN;
727 }
728
729 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
730 }
731 else
732 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
733 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
734 return rc;
735 }
736
737 /*
738 * The next level.
739 */
740 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
741 if (pThis->fLegacyMode)
742 {
743 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
744 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
745 }
746 else
747 {
748 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
749 uEntry = pPgLvl->a[i].u.paPae[iEntry];
750 }
751
752 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
753 {
754 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
755 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
756 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
757 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
758 return VERR_INTERNAL_ERROR;
759 }
760 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
761 }
762
763 /* made it thru without needing to remap anything. */
764 *ppvPTE = pvEntry;
765 return VINF_SUCCESS;
766}
767
768
769/**
770 * Sets up a guard page.
771 *
772 * @param pThis The dynamic mapping cache instance.
773 * @param pPage The page.
774 */
775DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage)
776{
777 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
778 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
779 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
780#ifdef PGMR0DYNMAP_GUARD_NP
781 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
782#else
783 if (pThis->fLegacyMode)
784 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
785 else
786 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
787#endif
788 pThis->cGuardPages++;
789}
790
791
792/**
793 * Adds a new segment of the specified size.
794 *
795 * @returns VBox status code.
796 * @param pThis The dynamic mapping cache instance.
797 * @param cPages The size of the new segment, give as a page count.
798 */
799static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
800{
801 int rc2;
802 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
803
804 /*
805 * Do the array reallocations first.
806 * (The pages array has to be replaced behind the spinlock of course.)
807 */
808 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
809 if (!pvSavedPTEs)
810 return VERR_NO_MEMORY;
811 pThis->pvSavedPTEs = pvSavedPTEs;
812
813 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
814 if (!pvPages)
815 {
816 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
817 if (pvSavedPTEs)
818 pThis->pvSavedPTEs = pvSavedPTEs;
819 return VERR_NO_MEMORY;
820 }
821
822 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
823 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
824
825 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
826 void *pvToFree = pThis->paPages;
827 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
828
829 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
830 RTMemFree(pvToFree);
831
832 /*
833 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
834 */
835 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
836 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
837 if (!pSeg)
838 return VERR_NO_MEMORY;
839 pSeg->pNext = NULL;
840 pSeg->cPages = cPages;
841 pSeg->iPage = pThis->cPages;
842 pSeg->cPTs = 0;
843 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
844 if (RT_SUCCESS(rc))
845 {
846 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
847 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
848 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
849
850 /*
851 * Walk thru the pages and set them up with a mapping of their PTE and everything.
852 */
853 ASMIntDisable();
854 PGMR0DYNMAPPGLVL PgLvl;
855 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
856 uint32_t const iEndPage = pThis->cPages + cPages;
857 for (uint32_t iPage = pThis->cPages;
858 iPage < iEndPage;
859 iPage++, pbPage += PAGE_SIZE)
860 {
861 /* Initialize the page data. */
862 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
863 pThis->paPages[iPage].pvPage = pbPage;
864 pThis->paPages[iPage].cRefs = 0;
865 pThis->paPages[iPage].uPte.pPae = 0;
866 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
867
868 /* Map its page table, retry until we've got a clean run (paranoia). */
869 do
870 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
871 &pThis->paPages[iPage].uPte.pv);
872 while (rc == VINF_TRY_AGAIN);
873 if (RT_FAILURE(rc))
874 break;
875
876 /* Save the PTE. */
877 if (pThis->fLegacyMode)
878 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
879 else
880 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
881
882#ifdef VBOX_STRICT
883 /* Check that we've got the right entry. */
884 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
885 RTHCPHYS HCPhysPte = pThis->fLegacyMode
886 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
887 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
888 if (HCPhysPage != HCPhysPte)
889 {
890 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
891 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
892 rc = VERR_INTERNAL_ERROR;
893 break;
894 }
895#endif
896 } /* for each page */
897 ASMIntEnable();
898
899 /* cleanup non-PT mappings */
900 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
901 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
902
903 if (RT_SUCCESS(rc))
904 {
905#if PGMR0DYNMAP_GUARD_PAGES > 0
906 /*
907 * Setup guard pages.
908 * (Note: TLBs will be shot down later on.)
909 */
910 uint32_t iPage = pThis->cPages;
911 while (iPage < iEndPage)
912 {
913 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
914 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
915 iPage++; /* the guarded page */
916 }
917
918 /* Make sure the very last page is a guard page too. */
919 iPage = iEndPage - 1;
920 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
921 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
922#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
923
924 /*
925 * Commit it by adding the segment to the list and updating the page count.
926 */
927 pSeg->pNext = pThis->pSegHead;
928 pThis->pSegHead = pSeg;
929 pThis->cPages += cPages;
930 return VINF_SUCCESS;
931 }
932
933 /*
934 * Bail out.
935 */
936 while (pSeg->cPTs-- > 0)
937 {
938 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
939 AssertRC(rc2);
940 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
941 }
942
943 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
944 AssertRC(rc2);
945 pSeg->hMemObj = NIL_RTR0MEMOBJ;
946 }
947 RTMemFree(pSeg);
948
949 /* Don't bother resizing the arrays, but free them if we're the only user. */
950 if (!pThis->cPages)
951 {
952 RTMemFree(pThis->paPages);
953 pThis->paPages = NULL;
954 RTMemFree(pThis->pvSavedPTEs);
955 pThis->pvSavedPTEs = NULL;
956 }
957 return rc;
958}
959
960
961/**
962 * Called by PGMR0DynMapInitVM under the init lock.
963 *
964 * @returns VBox status code.
965 * @param pThis The dynamic mapping cache instance.
966 */
967static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
968{
969 /*
970 * Calc the size and add a segment of that size.
971 */
972 uint32_t cMinPages;
973 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
974 AssertReturn(cPages, VERR_INTERNAL_ERROR);
975 int rc = pgmR0DynMapAddSeg(pThis, cPages);
976 if (rc == VERR_NO_MEMORY)
977 {
978 /*
979 * Try adding smaller segments.
980 */
981 do
982 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
983 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
984 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
985 rc = VINF_SUCCESS;
986 if (rc == VERR_NO_MEMORY)
987 {
988 if (pThis->cPages)
989 pgmR0DynMapTearDown(pThis);
990 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
991 }
992 }
993 Assert(ASMGetFlags() & X86_EFL_IF);
994
995#if PGMR0DYNMAP_GUARD_PAGES > 0
996 /* paranoia */
997 if (RT_SUCCESS(rc))
998 pgmR0DynMapTlbShootDown(pThis);
999#endif
1000 return rc;
1001}
1002
1003
1004/**
1005 * Called by PGMR0DynMapInitVM under the init lock.
1006 *
1007 * @returns VBox status code.
1008 * @param pThis The dynamic mapping cache instance.
1009 */
1010static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
1011{
1012 /*
1013 * Calc the new target size and add a segment of the appropriate size.
1014 */
1015 uint32_t cMinPages;
1016 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1017 AssertReturn(cPages, VERR_INTERNAL_ERROR);
1018 if (pThis->cPages >= cPages)
1019 return VINF_SUCCESS;
1020
1021 uint32_t cAdd = cPages - pThis->cPages;
1022 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1023 if (rc == VERR_NO_MEMORY)
1024 {
1025 /*
1026 * Try adding smaller segments.
1027 */
1028 do
1029 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1030 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1031 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1032 rc = VINF_SUCCESS;
1033 if (rc == VERR_NO_MEMORY)
1034 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1035 }
1036 Assert(ASMGetFlags() & X86_EFL_IF);
1037
1038#if PGMR0DYNMAP_GUARD_PAGES > 0
1039 /* paranoia */
1040 if (RT_SUCCESS(rc))
1041 pgmR0DynMapTlbShootDown(pThis);
1042#endif
1043 return rc;
1044}
1045
1046
1047/**
1048 * Called by PGMR0DynMapTermVM under the init lock.
1049 *
1050 * @returns VBox status code.
1051 * @param pThis The dynamic mapping cache instance.
1052 */
1053static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
1054{
1055 /*
1056 * Restore the original page table entries
1057 */
1058 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1059 uint32_t iPage = pThis->cPages;
1060 if (pThis->fLegacyMode)
1061 {
1062 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1063 while (iPage-- > 0)
1064 {
1065 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1066 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1067 X86PGUINT uNew = paSavedPTEs[iPage];
1068 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1069 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1070 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1071 }
1072 }
1073 else
1074 {
1075 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1076 while (iPage-- > 0)
1077 {
1078 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1079 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1080 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1081 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1082 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1083 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1084 }
1085 }
1086
1087 /*
1088 * Shoot down the TLBs on all CPUs before freeing them.
1089 */
1090 pgmR0DynMapTlbShootDown(pThis);
1091
1092 /*
1093 * Free the segments.
1094 */
1095 while (pThis->pSegHead)
1096 {
1097 int rc;
1098 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1099 pThis->pSegHead = pSeg->pNext;
1100
1101 uint32_t iPT = pSeg->cPTs;
1102 while (iPT-- > 0)
1103 {
1104 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1105 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1106 }
1107 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1108 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1109 pSeg->pNext = NULL;
1110 pSeg->iPage = UINT16_MAX;
1111 pSeg->cPages = 0;
1112 pSeg->cPTs = 0;
1113 RTMemFree(pSeg);
1114 }
1115
1116 /*
1117 * Free the arrays and restore the initial state.
1118 * The cLoadMax value is left behind for the next setup.
1119 */
1120 RTMemFree(pThis->paPages);
1121 pThis->paPages = NULL;
1122 RTMemFree(pThis->pvSavedPTEs);
1123 pThis->pvSavedPTEs = NULL;
1124 pThis->cPages = 0;
1125 pThis->cLoad = 0;
1126 pThis->cGuardPages = 0;
1127}
1128
1129
1130/**
1131 * Release references to a page, caller owns the spin lock.
1132 *
1133 * @param pThis The dynamic mapping cache instance.
1134 * @param iPage The page.
1135 * @param cRefs The number of references to release.
1136 */
1137DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
1138{
1139 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1140 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1141 if (!cRefs)
1142 pThis->cLoad--;
1143}
1144
1145
1146/**
1147 * Release references to a page, caller does not own the spin lock.
1148 *
1149 * @param pThis The dynamic mapping cache instance.
1150 * @param iPage The page.
1151 * @param cRefs The number of references to release.
1152 */
1153static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1154{
1155 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1156 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1157 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1158 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1159}
1160
1161
1162/**
1163 * pgmR0DynMapPage worker that deals with the tedious bits.
1164 *
1165 * @returns The page index on success, UINT32_MAX on failure.
1166 * @param pThis The dynamic mapping cache instance.
1167 * @param HCPhys The address of the page to be mapped.
1168 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1169 * @param pVM The shared VM structure, for statistics only.
1170 */
1171static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM)
1172{
1173 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlow);
1174
1175 /*
1176 * Check if any of the first 5 pages are unreferenced since the caller
1177 * already has made sure they aren't matching.
1178 */
1179 uint32_t const cPages = pThis->cPages;
1180 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1181 uint32_t iFreePage;
1182 if (!paPages[iPage].cRefs)
1183 iFreePage = iPage;
1184 else if (!paPages[(iPage + 1) % cPages].cRefs)
1185 iFreePage = (iPage + 1) % cPages;
1186 else if (!paPages[(iPage + 2) % cPages].cRefs)
1187 iFreePage = (iPage + 2) % cPages;
1188 else if (!paPages[(iPage + 3) % cPages].cRefs)
1189 iFreePage = (iPage + 3) % cPages;
1190 else if (!paPages[(iPage + 4) % cPages].cRefs)
1191 iFreePage = (iPage + 4) % cPages;
1192 else
1193 {
1194 /*
1195 * Search for an unused or matching entry.
1196 */
1197 iFreePage = (iPage + 5) % cPages;
1198 for (;;)
1199 {
1200 if (paPages[iFreePage].HCPhys == HCPhys)
1201 {
1202 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopHits);
1203 return iFreePage;
1204 }
1205 if (!paPages[iFreePage].cRefs)
1206 break;
1207
1208 /* advance */
1209 iFreePage = (iFreePage + 1) % cPages;
1210 if (RT_UNLIKELY(iFreePage == iPage))
1211 return UINT32_MAX;
1212 }
1213 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPageSlowLoopMisses);
1214 }
1215 Assert(iFreePage < cPages);
1216
1217 /*
1218 * Setup the new entry.
1219 */
1220 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1221 paPages[iFreePage].HCPhys = HCPhys;
1222 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1223 if (pThis->fLegacyMode)
1224 {
1225 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1226 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1227 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1228 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1229 | (HCPhys & X86_PTE_PG_MASK);
1230 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1231 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1232 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1233 }
1234 else
1235 {
1236 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1237 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1238 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1239 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1240 | (HCPhys & X86_PTE_PAE_PG_MASK);
1241 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1242 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1243 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1244 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1245 }
1246 return iFreePage;
1247}
1248
1249
1250/**
1251 * Maps a page into the pool.
1252 *
1253 * @returns Page index on success, UINT32_MAX on failure.
1254 * @param pThis The dynamic mapping cache instance.
1255 * @param HCPhys The address of the page to be mapped.
1256 * @param pVM The shared VM structure, for statistics only.
1257 * @param ppvPage Where to the page address.
1258 */
1259DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, PVM pVM, void **ppvPage)
1260{
1261 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1262 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1263 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1264 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapPage);
1265
1266 /*
1267 * Find an entry, if possible a matching one. The HCPhys address is hashed
1268 * down to a page index, collisions are handled by linear searching. Optimize
1269 * for a hit in the first 5 pages.
1270 *
1271 * To the cheap hits here and defer the tedious searching and inserting
1272 * to a helper function.
1273 */
1274 uint32_t const cPages = pThis->cPages;
1275 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1276 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1277 if (paPages[iPage].HCPhys != HCPhys)
1278 {
1279 uint32_t iPage2 = (iPage + 1) % cPages;
1280 if (paPages[iPage2].HCPhys != HCPhys)
1281 {
1282 iPage2 = (iPage + 2) % cPages;
1283 if (paPages[iPage2].HCPhys != HCPhys)
1284 {
1285 iPage2 = (iPage + 3) % cPages;
1286 if (paPages[iPage2].HCPhys != HCPhys)
1287 {
1288 iPage2 = (iPage + 4) % cPages;
1289 if (paPages[iPage2].HCPhys != HCPhys)
1290 {
1291 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM);
1292 if (RT_UNLIKELY(iPage == UINT32_MAX))
1293 {
1294 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1295 return iPage;
1296 }
1297 }
1298 else
1299 iPage = iPage2;
1300 }
1301 else
1302 iPage = iPage2;
1303 }
1304 else
1305 iPage = iPage2;
1306 }
1307 else
1308 iPage = iPage2;
1309 }
1310
1311 /*
1312 * Reference it, update statistics and get the return address.
1313 */
1314 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1315 if (cRefs == 1)
1316 {
1317 pThis->cLoad++;
1318 if (pThis->cLoad > pThis->cMaxLoad)
1319 pThis->cMaxLoad = pThis->cLoad;
1320 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1321 }
1322 else if (RT_UNLIKELY(cRefs <= 0))
1323 {
1324 ASMAtomicDecS32(&paPages[iPage].cRefs);
1325 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1326 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1327 }
1328 void *pvPage = paPages[iPage].pvPage;
1329
1330 /*
1331 * Invalidate the entry?
1332 */
1333 RTCPUID idRealCpu = RTMpCpuId();
1334 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1335 if (fInvalidateIt)
1336 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1337
1338 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1339
1340 /*
1341 * Do the actual invalidation outside the spinlock.
1342 */
1343 ASMInvalidatePage(pvPage);
1344
1345 *ppvPage = pvPage;
1346 return iPage;
1347}
1348
1349
1350/**
1351 * Assert the the integrity of the pool.
1352 *
1353 * @returns VBox status code.
1354 */
1355VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1356{
1357 /*
1358 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1359 */
1360 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1361 if (!pThis)
1362 return VINF_SUCCESS;
1363 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1364 AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC);
1365 if (!pThis->cUsers)
1366 return VERR_INVALID_PARAMETER;
1367
1368
1369 int rc = VINF_SUCCESS;
1370 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1371 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1372
1373#define CHECK_RET(expr, a) \
1374 do { \
1375 if (RT_UNLIKELY(!(expr))) \
1376 { \
1377 RTSpinlockRelease(pThis->hSpinlock, &Tmp); \
1378 AssertMsg1(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1379 AssertMsg2 a; \
1380 return VERR_INTERNAL_ERROR; \
1381 } \
1382 } while (0)
1383
1384 /*
1385 * Check that the PTEs are correct.
1386 */
1387 uint32_t cGuard = 0;
1388 uint32_t cLoad = 0;
1389 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1390 uint32_t iPage = pThis->cPages;
1391 if (pThis->fLegacyMode)
1392 {
1393 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1394 while (iPage-- > 0)
1395 {
1396 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1397 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1398 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1399 {
1400#ifdef PGMR0DYNMAP_GUARD_NP
1401 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1402 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1403#else
1404 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1405 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1406#endif
1407 cGuard++;
1408 }
1409 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1410 {
1411 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1412 X86PGUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1413 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1414 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1415 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1416 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1417 if (paPages[iPage].cRefs)
1418 cLoad++;
1419 }
1420 else
1421 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1422 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1423 }
1424 }
1425 else
1426 {
1427 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1428 while (iPage-- > 0)
1429 {
1430 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1431 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1432 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1433 {
1434#ifdef PGMR0DYNMAP_GUARD_NP
1435 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1436 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1437#else
1438 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1439 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1440#endif
1441 cGuard++;
1442 }
1443 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1444 {
1445 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1446 X86PGPAEUINT uPte = (paSavedPTEs[iPage] & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1447 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1448 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1449 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1450 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1451 if (paPages[iPage].cRefs)
1452 cLoad++;
1453 }
1454 else
1455 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1456 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1457 }
1458 }
1459
1460 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1461 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1462
1463#undef CHECK_RET
1464 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1465 return VINF_SUCCESS;
1466}
1467
1468
1469/**
1470 * Signals the start of a new set of mappings.
1471 *
1472 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1473 * API is called.
1474 *
1475 * @param pVCpu The shared data for the current virtual CPU.
1476 */
1477VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1478{
1479 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1480 pVCpu->pgm.s.AutoSet.cEntries = 0;
1481}
1482
1483
1484/**
1485 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1486 * since the PGMDynMapStartAutoSet call.
1487 *
1488 * If the set is already closed, nothing will be done.
1489 *
1490 * @param pVCpu The shared data for the current virtual CPU.
1491 */
1492VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1493{
1494 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1495
1496 /*
1497 * Is the set open?
1498 *
1499 * We might be closed before VM execution and not reopened again before
1500 * we leave for ring-3 or something.
1501 */
1502 uint32_t i = pSet->cEntries;
1503 if (i != PGMMAPSET_CLOSED)
1504 {
1505 /*
1506 * Close the set
1507 */
1508 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1509 pSet->cEntries = PGMMAPSET_CLOSED;
1510
1511 /*
1512 * Release any pages it's referencing.
1513 */
1514 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1515 {
1516 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1517 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1518 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1519
1520 while (i-- > 0)
1521 {
1522 uint32_t iPage = pSet->aEntries[i].iPage;
1523 Assert(iPage < pThis->cPages);
1524 int32_t cRefs = pSet->aEntries[i].cRefs;
1525 Assert(cRefs > 0);
1526 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1527
1528 pSet->aEntries[i].iPage = UINT16_MAX;
1529 pSet->aEntries[i].cRefs = 0;
1530 }
1531
1532 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1533 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1534 }
1535 }
1536}
1537
1538
1539/**
1540 * Migrates the automatic mapping set of the current vCPU if it's active and
1541 * necessary.
1542 *
1543 * This is called when re-entering the hardware assisted execution mode after a
1544 * nip down to ring-3. We run the risk that the CPU might have change and we
1545 * will therefore make sure all the cache entries currently in the auto set will
1546 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1547 * the entries will have been flagged as invalidated.
1548 *
1549 * @param pVCpu The shared data for the current virtual CPU.
1550 * @thread EMT
1551 */
1552VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1553{
1554 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1555 uint32_t i = pSet->cEntries;
1556 if (i != PGMMAPSET_CLOSED)
1557 {
1558 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
1559 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
1560 {
1561 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1562 RTCPUID idRealCpu = RTMpCpuId();
1563
1564 while (i-- > 0)
1565 {
1566 Assert(pSet->aEntries[i].cRefs > 0);
1567 uint32_t iPage = pSet->aEntries[i].iPage;
1568 Assert(iPage < pThis->cPages);
1569 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1570 {
1571 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1572 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1573 }
1574 }
1575 }
1576 }
1577}
1578
1579
1580/**
1581 * As a final resort for a full auto set, try merge duplicate entries.
1582 *
1583 * @param pSet The set.
1584 */
1585static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1586{
1587 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1588 {
1589 uint16_t const iPage = pSet->aEntries[i].iPage;
1590 uint32_t j = i + 1;
1591 while (j < pSet->cEntries)
1592 {
1593 if (pSet->aEntries[j].iPage != iPage)
1594 j++;
1595 else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX)
1596 {
1597 /* merge j into i removing j. */
1598 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1599 pSet->cEntries--;
1600 if (j < pSet->cEntries)
1601 {
1602 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1603 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1604 pSet->aEntries[pSet->cEntries].cRefs = 0;
1605 }
1606 else
1607 {
1608 pSet->aEntries[j].iPage = UINT16_MAX;
1609 pSet->aEntries[j].cRefs = 0;
1610 }
1611 }
1612 else
1613 {
1614 /* migrate the max number of refs from j into i and quit the inner loop. */
1615 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1616 Assert(pSet->aEntries[j].cRefs > cMigrate);
1617 pSet->aEntries[j].cRefs -= cMigrate;
1618 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1619 break;
1620 }
1621 }
1622 }
1623}
1624
1625
1626/* documented elsewhere - a bit of a mess. */
1627VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1628{
1629 /*
1630 * Validate state.
1631 */
1632 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPage);
1633 AssertPtr(ppv);
1634 *ppv = NULL;
1635 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1636 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1637 VERR_ACCESS_DENIED);
1638 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1639 PVMCPU pVCpu = VMMGetCpu(pVM);
1640 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1641 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1642 AssertMsgReturn(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries),
1643 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1644
1645 /*
1646 * Map it.
1647 */
1648 uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pVM, ppv);
1649 if (RT_UNLIKELY(iPage == UINT32_MAX))
1650 {
1651 static uint32_t s_cBitched = 0;
1652 if (++s_cBitched < 10)
1653 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
1654 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages));
1655 return VERR_PGM_DYNMAP_FAILED;
1656 }
1657
1658 /*
1659 * Add the page to the auto reference set.
1660 *
1661 * The typical usage pattern means that the same pages will be mapped
1662 * several times in the same set. We can catch most of these
1663 * remappings by looking a few pages back into the set. (The searching
1664 * and set optimizing path will hardly ever be used when doing this.)
1665 */
1666 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
1667 int32_t i = pSet->cEntries;
1668 if (i-- < 5)
1669 {
1670 pSet->aEntries[pSet->cEntries].cRefs = 1;
1671 pSet->aEntries[pSet->cEntries].iPage = iPage;
1672 pSet->cEntries++;
1673 }
1674 /* Any of the last 5 pages? */
1675 else if ( pSet->aEntries[i - 0].iPage == iPage
1676 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
1677 pSet->aEntries[i - 0].cRefs++;
1678 else if ( pSet->aEntries[i - 1].iPage == iPage
1679 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
1680 pSet->aEntries[i - 1].cRefs++;
1681 else if ( pSet->aEntries[i - 2].iPage == iPage
1682 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
1683 pSet->aEntries[i - 2].cRefs++;
1684 else if ( pSet->aEntries[i - 3].iPage == iPage
1685 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
1686 pSet->aEntries[i - 3].cRefs++;
1687 else if ( pSet->aEntries[i - 4].iPage == iPage
1688 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
1689 pSet->aEntries[i - 4].cRefs++;
1690 /* Don't bother searching unless we're above a 75% load. */
1691 else if (i <= (int32_t)RT_ELEMENTS(pSet->aEntries) / 4 * 3)
1692 {
1693 pSet->aEntries[pSet->cEntries].cRefs = 1;
1694 pSet->aEntries[pSet->cEntries].iPage = iPage;
1695 pSet->cEntries++;
1696 }
1697 else
1698 {
1699 /* Search the rest of the set. */
1700 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1701 i -= 4;
1702 while (i-- > 0)
1703 if ( pSet->aEntries[i].iPage == iPage
1704 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
1705 {
1706 pSet->aEntries[i].cRefs++;
1707 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetSearchHits);
1708 break;
1709 }
1710 if (i < 0)
1711 {
1712 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetSearchMisses);
1713 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1714 {
1715 STAM_COUNTER_INC(&pVM->pgm.s.StatR0DynMapHCPageSetOptimize);
1716 pgmDynMapOptimizeAutoSet(pSet);
1717 }
1718 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1719 {
1720 pSet->aEntries[pSet->cEntries].cRefs = 1;
1721 pSet->aEntries[pSet->cEntries].iPage = iPage;
1722 pSet->cEntries++;
1723 }
1724 else
1725 {
1726 /* We're screwed. */
1727 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1728
1729 static uint32_t s_cBitched = 0;
1730 if (++s_cBitched < 10)
1731 LogRel(("PGMDynMapHCPage: set is full!\n"));
1732 *ppv = NULL;
1733 return VERR_PGM_DYNMAP_FULL_SET;
1734 }
1735 }
1736 }
1737
1738 return VINF_SUCCESS;
1739}
1740
1741
1742#ifdef DEBUG
1743/** For pgmR0DynMapTest3PerCpu. */
1744typedef struct PGMR0DYNMAPTEST
1745{
1746 uint32_t u32Expect;
1747 uint32_t *pu32;
1748 uint32_t volatile cFailures;
1749} PGMR0DYNMAPTEST;
1750typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
1751
1752/**
1753 * Checks that the content of the page is the same on all CPUs, i.e. that there
1754 * are no CPU specfic PTs or similar nasty stuff involved.
1755 *
1756 * @param idCpu The current CPU.
1757 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
1758 * @param pvUser2 Unused, ignored.
1759 */
1760static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1761{
1762 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
1763 ASMInvalidatePage(pTest->pu32);
1764 if (*pTest->pu32 != pTest->u32Expect)
1765 ASMAtomicIncU32(&pTest->cFailures);
1766 NOREF(pvUser2); NOREF(idCpu);
1767}
1768
1769
1770/**
1771 * Performs some basic tests in debug builds.
1772 */
1773static int pgmR0DynMapTest(PVM pVM)
1774{
1775 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
1776 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1777 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
1778 uint32_t i;
1779
1780 /*
1781 * Assert internal integrity first.
1782 */
1783 LogRel(("Test #0\n"));
1784 int rc = PGMR0DynMapAssertIntegrity();
1785 if (RT_FAILURE(rc))
1786 return rc;
1787
1788 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
1789 pVM->pgm.s.pvR0DynMapUsed = pThis;
1790
1791 /*
1792 * Simple test, map CR3 twice and check that we're getting the
1793 * same mapping address back.
1794 */
1795 LogRel(("Test #1\n"));
1796 ASMIntDisable();
1797 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1798
1799 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
1800 void *pv = (void *)(intptr_t)-1;
1801 void *pv2 = (void *)(intptr_t)-2;
1802 rc = PGMDynMapHCPage(pVM, cr3, &pv);
1803 int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2);
1804 ASMIntEnable();
1805 if ( RT_SUCCESS(rc2)
1806 && RT_SUCCESS(rc)
1807 && pv == pv2)
1808 {
1809 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1810 rc = PGMR0DynMapAssertIntegrity();
1811
1812 /*
1813 * Check that the simple set overflow code works by filling it
1814 * with more CR3 mappings.
1815 */
1816 LogRel(("Test #2\n"));
1817 ASMIntDisable();
1818 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
1819 {
1820 pv2 = (void *)(intptr_t)-4;
1821 rc = PGMDynMapHCPage(pVM, cr3, &pv2);
1822 }
1823 ASMIntEnable();
1824 if (RT_FAILURE(rc) || pv != pv2)
1825 {
1826 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
1827 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1828 }
1829 else if (pSet->cEntries != 5)
1830 {
1831 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
1832 rc = VERR_INTERNAL_ERROR;
1833 }
1834 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
1835 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
1836 || pSet->aEntries[2].cRefs != 1
1837 || pSet->aEntries[1].cRefs != 1
1838 || pSet->aEntries[0].cRefs != 1)
1839 {
1840 LogRel(("failed(%d): bad set dist: ", __LINE__));
1841 for (i = 0; i < pSet->cEntries; i++)
1842 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
1843 LogRel(("\n"));
1844 rc = VERR_INTERNAL_ERROR;
1845 }
1846 if (RT_SUCCESS(rc))
1847 rc = PGMR0DynMapAssertIntegrity();
1848 if (RT_SUCCESS(rc))
1849 {
1850 /*
1851 * Trigger an set optimization run (exactly).
1852 */
1853 LogRel(("Test #3\n"));
1854 ASMIntDisable();
1855 pv2 = NULL;
1856 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
1857 {
1858 pv2 = (void *)(intptr_t)(-5 - i);
1859 rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2);
1860 }
1861 ASMIntEnable();
1862 if (RT_FAILURE(rc) || pv == pv2)
1863 {
1864 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
1865 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1866 }
1867 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
1868 {
1869 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1870 rc = VERR_INTERNAL_ERROR;
1871 }
1872 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1873 if (RT_SUCCESS(rc))
1874 rc = PGMR0DynMapAssertIntegrity();
1875 if (RT_SUCCESS(rc))
1876 {
1877 /*
1878 * Trigger an overflow error.
1879 */
1880 LogRel(("Test #4\n"));
1881 ASMIntDisable();
1882 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
1883 {
1884 rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2);
1885 if (RT_SUCCESS(rc))
1886 rc = PGMR0DynMapAssertIntegrity();
1887 if (RT_FAILURE(rc))
1888 break;
1889 }
1890 ASMIntEnable();
1891 if (rc == VERR_PGM_DYNMAP_FULL_SET)
1892 {
1893 /* flush the set. */
1894 LogRel(("Test #5\n"));
1895 ASMIntDisable();
1896 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1897 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1898 PGMDynMapStartAutoSet(&pVM->aCpus[0]);
1899 ASMIntEnable();
1900
1901 rc = PGMR0DynMapAssertIntegrity();
1902 }
1903 else
1904 {
1905 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
1906 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
1907 if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR;
1908 }
1909 }
1910 }
1911 }
1912 else
1913 {
1914 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
1915 if (RT_SUCCESS(rc))
1916 rc = rc2;
1917 }
1918
1919 /*
1920 * Check that everyone sees the same stuff.
1921 */
1922 if (RT_SUCCESS(rc))
1923 {
1924 LogRel(("Test #5\n"));
1925 ASMIntDisable();
1926 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
1927 rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv);
1928 if (RT_SUCCESS(rc))
1929 {
1930 PGMR0DYNMAPTEST Test;
1931 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
1932 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
1933 Test.u32Expect = *pu32Real;
1934 ASMAtomicWriteU32(&Test.cFailures, 0);
1935 ASMIntEnable();
1936
1937 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
1938 if (RT_FAILURE(rc))
1939 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
1940 else if (Test.cFailures)
1941 {
1942 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
1943 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1944 rc = VERR_INTERNAL_ERROR;
1945 }
1946 else
1947 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
1948 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
1949 }
1950 else
1951 {
1952 ASMIntEnable();
1953 LogRel(("failed(%d): rc=%Rrc\n", rc));
1954 }
1955 }
1956
1957 /*
1958 * Clean up.
1959 */
1960 LogRel(("Cleanup.\n"));
1961 ASMIntDisable();
1962 PGMDynMapMigrateAutoSet(&pVM->aCpus[0]);
1963 PGMDynMapReleaseAutoSet(&pVM->aCpus[0]);
1964 ASMIntEnable();
1965
1966 if (RT_SUCCESS(rc))
1967 rc = PGMR0DynMapAssertIntegrity();
1968 else
1969 PGMR0DynMapAssertIntegrity();
1970
1971 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
1972 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
1973 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
1974 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
1975 return rc;
1976}
1977#endif /* DEBUG */
1978
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette