VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14517

Last change on this file since 14517 was 14517, checked in by vboxsync, 16 years ago

PGMR0DynMap: fixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 46.9 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14517 2008-11-24 13:41:39Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#include <VBox/pgm.h>
26#include "../PGMInternal.h"
27#include <VBox/vm.h>
28#include <VBox/sup.h>
29#include <VBox/err.h>
30#include <iprt/asm.h>
31#include <iprt/alloc.h>
32#include <iprt/assert.h>
33#include <iprt/cpuset.h>
34#include <iprt/memobj.h>
35#include <iprt/mp.h>
36#include <iprt/semaphore.h>
37#include <iprt/spinlock.h>
38#include <iprt/string.h>
39
40
41/*******************************************************************************
42* Defined Constants And Macros *
43*******************************************************************************/
44/** The max size of the mapping cache (in pages). */
45#define PGMR0DYNMAP_MAX_PAGES ((8*_1M) >> PAGE_SHIFT)
46/** The small segment size that is adopted on out-of-memory conditions with a
47 * single big segment. */
48#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
49/** The number of pages we reserve per CPU. */
50#define PGMR0DYNMAP_PAGES_PER_CPU 64
51/** Calcs the overload threshold. Current set at 50%. */
52#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
53
54
55/*******************************************************************************
56* Structures and Typedefs *
57*******************************************************************************/
58/**
59 * Ring-0 dynamic mapping cache segment.
60 *
61 * The dynamic mapping cache can be extended with additional segments if the
62 * load is found to be too high. This done the next time a VM is created, under
63 * the protection of the init mutex. The arrays is reallocated and the new
64 * segment is added to the end of these. Nothing is rehashed of course, as the
65 * indexes / addresses must remain unchanged.
66 *
67 * This structure is only modified while owning the init mutex or during module
68 * init / term.
69 */
70typedef struct PGMR0DYNMAPSEG
71{
72 /** Pointer to the next segment. */
73 struct PGMR0DYNMAPSEG *pNext;
74 /** The memory object for the virtual address range that we're abusing. */
75 RTR0MEMOBJ hMemObj;
76 /** The start page in the cache. (I.e. index into the arrays.) */
77 uint16_t iPage;
78 /** The number of pages this segment contributes. */
79 uint16_t cPages;
80 /** The number of page tables. */
81 uint16_t cPTs;
82 /** The memory objects for the page tables. */
83 RTR0MEMOBJ ahMemObjPTs[1];
84} PGMR0DYNMAPSEG;
85/** Pointer to a ring-0 dynamic mapping cache segment. */
86typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
87
88
89/**
90 * Ring-0 dynamic mapping cache entry.
91 *
92 * This structure tracks
93 */
94typedef struct PGMR0DYNMAPENTRY
95{
96 /** The physical address of the currently mapped page.
97 * This is duplicate for three reasons: cache locality, cache policy of the PT
98 * mappings and sanity checks. */
99 RTHCPHYS HCPhys;
100 /** Pointer to the page. */
101 void *pvPage;
102 /** The number of references. */
103 int32_t volatile cRefs;
104 /** PTE pointer union. */
105 union PGMR0DYNMAPENTRY_PPTE
106 {
107 /** PTE pointer, 32-bit legacy version. */
108 PX86PTE pLegacy;
109 /** PTE pointer, PAE version. */
110 PX86PTEPAE pPae;
111 /** PTE pointer, the void version. */
112 void *pv;
113 } uPte;
114 /** CPUs that haven't invalidated this entry after it's last update. */
115 RTCPUSET PendingSet;
116} PGMR0DYNMAPENTRY;
117/** Pointer to a ring-0 dynamic mapping cache entry. */
118typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
119
120
121/**
122 * Ring-0 dynamic mapping cache.
123 *
124 * This is initialized during VMMR0 module init but no segments are allocated at
125 * that time. Segments will be added when the first VM is started and removed
126 * again when the last VM shuts down, thus avoid consuming memory while dormant.
127 * At module termination, the remaining bits will be freed up.
128 */
129typedef struct PGMR0DYNMAP
130{
131 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
132 uint32_t u32Magic;
133 /** Spinlock serializing the normal operation of the cache. */
134 RTSPINLOCK hSpinlock;
135 /** Array for tracking and managing the pages. */
136 PPGMR0DYNMAPENTRY paPages;
137 /** The cache size given as a number of pages. */
138 uint32_t cPages;
139 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
140 bool fLegacyMode;
141 /** The current load. */
142 uint32_t cLoad;
143 /** The max load ever.
144 * This is maintained to get trigger adding of more mapping space. */
145 uint32_t cMaxLoad;
146 /** Initialization / termination lock. */
147 RTSEMFASTMUTEX hInitLock;
148 /** The number of users (protected by hInitLock). */
149 uint32_t cUsers;
150 /** Array containing a copy of the original page tables.
151 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
152 void *pvSavedPTEs;
153 /** List of segments. */
154 PPGMR0DYNMAPSEG pSegHead;
155 /** The paging mode. */
156 SUPPAGINGMODE enmPgMode;
157} PGMR0DYNMAP;
158/** Pointer to the ring-0 dynamic mapping cache */
159typedef PGMR0DYNMAP *PPGMR0DYNMAP;
160
161/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
162#define PGMR0DYNMAP_MAGIC 0x19640201
163
164
165/**
166 * Paging level data.
167 */
168typedef struct PGMR0DYNMAPPGLVL
169{
170 uint32_t cLevels; /**< The number of levels. */
171 struct
172 {
173 RTHCPHYS HCPhys; /**< The address of the page for the current level,
174 * i.e. what hMemObj/hMapObj is currently mapping. */
175 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
176 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
177 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
178 uint32_t fPtrShift; /**< The pointer shift count. */
179 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
180 uint64_t fAndMask; /**< And mask to check entry flags. */
181 uint64_t fResMask; /**< The result from applying fAndMask. */
182 union
183 {
184 void *pv; /**< hMapObj address. */
185 PX86PGUINT paLegacy; /**< Legacy table view. */
186 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
187 } u;
188 } a[4];
189} PGMR0DYNMAPPGLVL;
190/** Pointer to paging level data. */
191typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
192
193
194/*******************************************************************************
195* Global Variables *
196*******************************************************************************/
197/** Pointer to the ring-0 dynamic mapping cache. */
198static PPGMR0DYNMAP g_pPGMR0DynMap;
199
200
201/*******************************************************************************
202* Internal Functions *
203*******************************************************************************/
204static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
205static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
206static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
207static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
208
209
210/**
211 * Initializes the ring-0 dynamic mapping cache.
212 *
213 * @returns VBox status code.
214 */
215VMMR0DECL(int) PGMR0DynMapInit(void)
216{
217 Assert(!g_pPGMR0DynMap);
218
219 /*
220 * Create and initialize the cache instance.
221 */
222 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
223 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
224 int rc = VINF_SUCCESS;
225 pThis->enmPgMode = SUPR0GetPagingMode();
226 switch (pThis->enmPgMode)
227 {
228 case SUPPAGINGMODE_32_BIT:
229 case SUPPAGINGMODE_32_BIT_GLOBAL:
230 pThis->fLegacyMode = false;
231 break;
232 case SUPPAGINGMODE_PAE:
233 case SUPPAGINGMODE_PAE_GLOBAL:
234 case SUPPAGINGMODE_PAE_NX:
235 case SUPPAGINGMODE_PAE_GLOBAL_NX:
236 case SUPPAGINGMODE_AMD64:
237 case SUPPAGINGMODE_AMD64_GLOBAL:
238 case SUPPAGINGMODE_AMD64_NX:
239 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
240 pThis->fLegacyMode = false;
241 break;
242 default:
243 rc = VERR_INTERNAL_ERROR;
244 break;
245 }
246 if (RT_SUCCESS(rc))
247 {
248 rc = RTSemFastMutexCreate(&pThis->hInitLock);
249 if (RT_SUCCESS(rc))
250 {
251 rc = RTSpinlockCreate(&pThis->hSpinlock);
252 if (RT_SUCCESS(rc))
253 {
254 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
255 g_pPGMR0DynMap = pThis;
256 return VINF_SUCCESS;
257 }
258 RTSemFastMutexDestroy(pThis->hInitLock);
259 }
260 }
261 RTMemFree(pThis);
262 return rc;
263}
264
265
266/**
267 * Terminates the ring-0 dynamic mapping cache.
268 */
269VMMR0DECL(void) PGMR0DynMapTerm(void)
270{
271 /*
272 * Destroy the cache.
273 *
274 * There is not supposed to be any races here, the loader should
275 * make sure about that. So, don't bother locking anything.
276 *
277 * The VM objects should all be destroyed by now, so there is no
278 * dangling users or anything like that to clean up. This routine
279 * is just a mirror image of PGMR0DynMapInit.
280 */
281 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
282 if (pThis)
283 {
284 AssertPtr(pThis);
285 g_pPGMR0DynMap = NULL;
286
287 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->cPages,
288 ("cUsers=%d paPages=%p cPages=%#x\n",
289 pThis->cUsers, pThis->paPages, pThis->cPages));
290
291 /* Free the associated resources. */
292 RTSemFastMutexDestroy(pThis->hInitLock);
293 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
294 RTSpinlockDestroy(pThis->hSpinlock);
295 pThis->hSpinlock = NIL_RTSPINLOCK;
296 pThis->u32Magic = UINT32_MAX;
297 RTMemFree(pThis);
298 }
299}
300
301
302/**
303 * Initializes the dynamic mapping cache for a new VM.
304 *
305 * @returns VBox status code.
306 * @param pVM Pointer to the shared VM structure.
307 */
308VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
309{
310 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
311
312 /*
313 * Initialize the auto sets.
314 */
315 VMCPUID idCpu = pVM->cCPUs;
316 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
317 while (idCpu-- > 0)
318 {
319 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
320 uint32_t j = RT_ELEMENTS(pSet->aEntries);
321 while (j-- > 0)
322 {
323 pSet->aEntries[j].iPage = UINT16_MAX;
324 pSet->aEntries[j].cRefs = 0;
325 }
326 pSet->cEntries = PGMMAPSET_CLOSED;
327 }
328
329 /*
330 * Do we need the cache? Skip the last bit if we don't.
331 */
332 if (!HWACCMIsEnabled(pVM))
333 return VINF_SUCCESS;
334
335 /*
336 * Reference and if necessary setup or expand the cache.
337 */
338 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
339 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
340 int rc = RTSemFastMutexRequest(pThis->hInitLock);
341 AssertLogRelRCReturn(rc, rc);
342
343 pThis->cUsers++;
344 if (pThis->cUsers == 1)
345 rc = pgmR0DynMapSetup(pThis);
346 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages))
347 rc = pgmR0DynMapExpand(pThis);
348 if (RT_SUCCESS(rc))
349 pVM->pgm.s.pvR0DynMapUsed = pThis;
350 else
351 pThis->cUsers--;
352
353 RTSemFastMutexRelease(pThis->hInitLock);
354
355 return rc;
356}
357
358
359/**
360 * Terminates the dynamic mapping cache usage for a VM.
361 *
362 * @param pVM Pointer to the shared VM structure.
363 */
364VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
365{
366 /*
367 * Return immediately if we're not using the cache.
368 */
369 if (!pVM->pgm.s.pvR0DynMapUsed)
370 return;
371
372 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
373 AssertPtrReturnVoid(pThis);
374
375 int rc = RTSemFastMutexRequest(pThis->hInitLock);
376 AssertLogRelRCReturnVoid(rc);
377
378 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
379 {
380 pVM->pgm.s.pvR0DynMapUsed = NULL;
381
382 /*
383 * Clean up and check the auto sets.
384 */
385 VMCPUID idCpu = pVM->cCPUs;
386 while (idCpu-- > 0)
387 {
388 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
389 uint32_t j = pSet->cEntries;
390 if (j <= RT_ELEMENTS(pSet->aEntries))
391 {
392 /*
393 * The set is open, close it.
394 */
395 while (j-- > 0)
396 {
397 int32_t cRefs = pSet->aEntries[j].cRefs;
398 uint32_t iPage = pSet->aEntries[j].iPage;
399 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
400 if (iPage < pThis->cPages && cRefs > 0)
401 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
402 else
403 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
404
405 pSet->aEntries[j].iPage = UINT16_MAX;
406 pSet->aEntries[j].cRefs = 0;
407 }
408 pSet->cEntries = PGMMAPSET_CLOSED;
409 }
410 else
411 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
412
413 j = RT_ELEMENTS(pSet->aEntries);
414 while (j-- > 0)
415 {
416 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
417 Assert(!pSet->aEntries[j].cRefs);
418 }
419 }
420
421 /*
422 * Release our reference to the mapping cache.
423 */
424 Assert(pThis->cUsers > 0);
425 pThis->cUsers--;
426 if (!pThis->cUsers)
427 pgmR0DynMapTearDown(pThis);
428 }
429 else
430 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
431
432 RTSemFastMutexRelease(pThis->hInitLock);
433}
434
435
436/**
437 * Calculate the new cache size based on cMaxLoad statistics.
438 *
439 * @returns Number of pages.
440 * @param pThis The dynamic mapping cache instance.
441 * @param pcMinPages The minimal size in pages.
442 */
443static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
444{
445 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
446
447 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU (/2). */
448 RTCPUID cCpus = RTMpGetCount();
449 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
450 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
451 uint32_t cMinPages = cCpus * (PGMR0DYNMAP_PAGES_PER_CPU / 2);
452
453 /* adjust against cMaxLoad. */
454 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
455 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
456 pThis->cMaxLoad = 0;
457
458 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
459 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
460
461 if (pThis->cMaxLoad > cMinPages)
462 cMinPages = pThis->cMaxLoad;
463
464 /* adjust against max and current size. */
465 if (cPages < pThis->cPages)
466 cPages = pThis->cPages;
467 if (cPages > PGMR0DYNMAP_MAX_PAGES)
468 cPages = PGMR0DYNMAP_MAX_PAGES;
469
470 if (cMinPages < pThis->cPages)
471 cMinPages = pThis->cPages;
472 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
473 cMinPages = PGMR0DYNMAP_MAX_PAGES;
474
475 Assert(cMinPages);
476 *pcMinPages = cMinPages;
477 return cPages;
478}
479
480
481/**
482 * Initializes the paging level data.
483 *
484 * @param pThis The dynamic mapping cache instance.
485 * @param pPgLvl The paging level data.
486 */
487void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
488{
489 RTCCUINTREG cr4 = ASMGetCR4();
490 switch (pThis->enmPgMode)
491 {
492 case SUPPAGINGMODE_32_BIT:
493 case SUPPAGINGMODE_32_BIT_GLOBAL:
494 pPgLvl->cLevels = 2;
495 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
496 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
497 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
498 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
499 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
500
501 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
502 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
503 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
504 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
505 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
506 break;
507
508 case SUPPAGINGMODE_PAE:
509 case SUPPAGINGMODE_PAE_GLOBAL:
510 case SUPPAGINGMODE_PAE_NX:
511 case SUPPAGINGMODE_PAE_GLOBAL_NX:
512 pPgLvl->cLevels = 3;
513 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
514 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
515 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
516 pPgLvl->a[0].fAndMask = X86_PDPE_P;
517 pPgLvl->a[0].fResMask = X86_PDPE_P;
518
519 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
520 pPgLvl->a[1].fPtrMask = X86_PD_MASK;
521 pPgLvl->a[1].fPtrShift = X86_PD_SHIFT;
522 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
523 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
524
525 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
526 pPgLvl->a[2].fPtrMask = X86_PT_MASK;
527 pPgLvl->a[2].fPtrShift = X86_PT_SHIFT;
528 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
529 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
530 break;
531
532 case SUPPAGINGMODE_AMD64:
533 case SUPPAGINGMODE_AMD64_GLOBAL:
534 case SUPPAGINGMODE_AMD64_NX:
535 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
536 pPgLvl->cLevels = 3;
537 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
538 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
539 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
540 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
541 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
542
543 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
544 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
545 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
546 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
547 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
548
549 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
550 pPgLvl->a[2].fPtrMask = X86_PD_MASK;
551 pPgLvl->a[2].fPtrShift = X86_PD_SHIFT;
552 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
553 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
554
555 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
556 pPgLvl->a[3].fPtrMask = X86_PT_MASK;
557 pPgLvl->a[3].fPtrShift = X86_PT_SHIFT;
558 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
559 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
560 break;
561
562 default:
563 AssertFailed();
564 pPgLvl->cLevels = 0;
565 break;
566 }
567
568 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
569 {
570 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
571 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
572 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
573 pPgLvl->a[i].u.pv = NULL;
574 }
575}
576
577
578/**
579 * Maps a PTE.
580 *
581 * This will update the segment structure when new PTs are mapped.
582 *
583 * It also assumes that we (for paranoid reasons) wish to establish a mapping
584 * chain from CR3 to the PT that all corresponds to the processor we're
585 * currently running on, and go about this by running with interrupts disabled
586 * and restarting from CR3 for every change.
587 *
588 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
589 * to re-enable interrupts.
590 * @param pThis The dynamic mapping cache instance.
591 * @param pPgLvl The paging level structure.
592 * @param pvPage The page.
593 * @param pSeg The segment.
594 * @param cMaxPTs The max number of PTs expected in the segment.
595 * @param ppvPTE Where to store the PTE address.
596 */
597static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
598 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
599{
600 Assert(!(ASMGetFlags() & X86_EFL_IF));
601
602 void *pvEntry = NULL;
603 X86PGPAEUINT uEntry = ASMGetCR3();
604 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
605 {
606 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
607 if (pPgLvl->a[i].HCPhys != HCPhys)
608 {
609 /*
610 * Need to remap this level.
611 * The final level, the PT, will not be freed since that is what it's all about.
612 */
613 ASMIntEnable();
614 if (i + 1 == pPgLvl->cLevels)
615 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
616 else
617 {
618 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
619 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
620 }
621
622 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
623 if (RT_SUCCESS(rc))
624 {
625 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
626 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
627 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
628 if (RT_SUCCESS(rc))
629 {
630 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
631 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
632 pPgLvl->a[i].HCPhys = HCPhys;
633 if (i + 1 == pPgLvl->cLevels)
634 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
635 ASMIntDisable();
636 return VINF_TRY_AGAIN;
637 }
638
639 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
640 }
641 else
642 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
643 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
644 return rc;
645 }
646
647 /*
648 * The next level.
649 */
650 uint32_t iEntry = ((uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
651 if (pThis->fLegacyMode)
652 {
653 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
654 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
655 }
656 else
657 {
658 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
659 uEntry = pPgLvl->a[i].u.paPae[iEntry];
660 }
661
662 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
663 {
664 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n",
665 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask));
666 return VERR_INTERNAL_ERROR;
667 }
668 }
669
670 /* made it thru without needing to remap anything. */
671 *ppvPTE = pvEntry;
672 return VINF_SUCCESS;
673}
674
675
676/**
677 * Adds a new segment of the specified size.
678 *
679 * @returns VBox status code.
680 * @param pThis The dynamic mapping cache instance.
681 * @param cPages The size of the new segment, give as a page count.
682 */
683static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
684{
685 int rc2;
686 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
687
688 /*
689 * Do the array rellocation first.
690 * (The pages array has to be replaced behind the spinlock of course.)
691 */
692 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
693 if (!pvSavedPTEs)
694 return VERR_NO_MEMORY;
695 pThis->pvSavedPTEs = pvSavedPTEs;
696
697 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
698 if (!pvPages)
699 {
700 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
701 if (pvSavedPTEs)
702 pThis->pvSavedPTEs = pvSavedPTEs;
703 return VERR_NO_MEMORY;
704 }
705
706 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
707 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
708
709 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
710 void *pvToFree = pThis->paPages;
711 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
712
713 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
714 RTMemFree(pvToFree);
715
716 /*
717 * Allocate the segment structure and pages memory.
718 */
719 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
720 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
721 if (!pSeg)
722 return VERR_NO_MEMORY;
723 pSeg->pNext = NULL;
724 pSeg->cPages = cPages;
725 pSeg->iPage = pThis->cPages;
726 pSeg->cPTs = 0;
727 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
728 if (RT_SUCCESS(rc))
729 {
730 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
731 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
732
733 /*
734 * Walk thru the pages and set them up with a mapping of their PTE and everything.
735 */
736 ASMIntDisable();
737 PGMR0DYNMAPPGLVL PgLvl;
738 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
739 uint32_t iEndPage = pThis->cPages + cPages;
740 for (uint32_t iPage = pThis->cPages;
741 iPage < iEndPage;
742 iPage++, pbPage += PAGE_SIZE)
743 {
744 /* Initialize the page data. */
745 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
746 pThis->paPages[iPage].pvPage = pbPage;
747 pThis->paPages[iPage].cRefs = 0;
748 pThis->paPages[iPage].uPte.pPae = 0;
749 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
750
751 /* Map its page table, retry until we've got a clean run (paranoia). */
752 do
753 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
754 &pThis->paPages[iPage].uPte.pv);
755 while (rc == VINF_TRY_AGAIN);
756 if (RT_FAILURE(rc))
757 break;
758
759 /* Save the PTE. */
760 if (pThis->fLegacyMode)
761 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
762 else
763 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
764 } /* for each page */
765 ASMIntEnable();
766
767 /* cleanup non-PT mappings */
768 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
769 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
770
771 if (RT_SUCCESS(rc))
772 {
773 /** @todo setup guard pages here later (strict builds should leave every
774 * second page and the start/end pages not present). */
775
776 /*
777 * Commit it by adding the segment to the list and updating the page count.
778 */
779 pSeg->pNext = pThis->pSegHead;
780 pThis->pSegHead = pSeg;
781 pThis->cPages += cPages;
782 return VINF_SUCCESS;
783 }
784
785 /*
786 * Bail out.
787 */
788 while (pSeg->cPTs-- > 0)
789 {
790 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
791 AssertRC(rc2);
792 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
793 }
794
795 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
796 AssertRC(rc2);
797 pSeg->hMemObj = NIL_RTR0MEMOBJ;
798 }
799 RTMemFree(pSeg);
800
801 /* Don't bother resizing the arrays, too layz. */
802 return rc;
803}
804
805
806/**
807 * Called by PGMR0DynMapInitVM under the init lock.
808 *
809 * @returns VBox status code.
810 * @param pThis The dynamic mapping cache instance.
811 */
812static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
813{
814 /*
815 * Calc the size and add a segment of that size.
816 */
817 uint32_t cMinPages;
818 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
819 AssertReturn(cPages, VERR_INTERNAL_ERROR);
820 int rc = pgmR0DynMapAddSeg(pThis, cPages);
821 if (rc == VERR_NO_MEMORY)
822 {
823 /*
824 * Try adding smaller segments.
825 */
826 do
827 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
828 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
829 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
830 rc = VINF_SUCCESS;
831 if (rc == VERR_NO_MEMORY)
832 {
833 if (pThis->cPages)
834 pgmR0DynMapTearDown(pThis);
835 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
836 }
837 }
838 Assert(ASMGetFlags() & X86_EFL_IF);
839 return rc;
840}
841
842
843/**
844 * Called by PGMR0DynMapInitVM under the init lock.
845 *
846 * @returns VBox status code.
847 * @param pThis The dynamic mapping cache instance.
848 */
849static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
850{
851 /*
852 * Calc the new target size and add a segment of the appropriate size.
853 */
854 uint32_t cMinPages;
855 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
856 AssertReturn(cPages, VERR_INTERNAL_ERROR);
857 if (pThis->cPages >= cPages)
858 return VINF_SUCCESS;
859
860 uint32_t cAdd = cPages - pThis->cPages;
861 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
862 if (rc == VERR_NO_MEMORY)
863 {
864 /*
865 * Try adding smaller segments.
866 */
867 do
868 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
869 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
870 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
871 rc = VINF_SUCCESS;
872 if (rc == VERR_NO_MEMORY)
873 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
874 }
875 Assert(ASMGetFlags() & X86_EFL_IF);
876 return rc;
877}
878
879
880/**
881 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
882 *
883 * @param idCpu The current CPU.
884 * @param pvUser1 The dynamic mapping cache instance.
885 * @param pvUser2 Unused, NULL.
886 */
887static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
888{
889 Assert(!pvUser2);
890 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
891 AssertPtr(pThis == g_pPGMR0DynMap);
892 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
893 uint32_t iPage = pThis->cPages;
894 while (iPage-- > 0)
895 ASMInvalidatePage(paPages[iPage].pvPage);
896}
897
898
899/**
900 * Called by PGMR0DynMapTermVM under the init lock.
901 *
902 * @returns VBox status code.
903 * @param pThis The dynamic mapping cache instance.
904 */
905static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
906{
907 /*
908 * Restore the original page table entries
909 */
910 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
911 uint32_t iPage = pThis->cPages;
912 if (pThis->fLegacyMode)
913 {
914 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
915 while (iPage-- > 0)
916 {
917 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
918 X86PGUINT uOld2 = uOld; NOREF(uOld2);
919 X86PGUINT uNew = paSavedPTEs[iPage];
920 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
921 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
922 }
923 }
924 else
925 {
926 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
927 while (iPage-- > 0)
928 {
929 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
930 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
931 X86PGPAEUINT uNew = paSavedPTEs[iPage];
932 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
933 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
934 }
935 }
936
937 /*
938 * Shoot down the TLBs on all CPUs before freeing them.
939 * If RTMpOnAll fails, make sure the TLBs are invalidated on the current CPU at least.
940 */
941 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
942 AssertRC(rc);
943 if (RT_FAILURE(rc))
944 {
945 iPage = pThis->cPages;
946 while (iPage-- > 0)
947 ASMInvalidatePage(paPages[iPage].pvPage);
948 }
949
950 /*
951 * Free the segments.
952 */
953 while (pThis->pSegHead)
954 {
955 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
956 pThis->pSegHead = pSeg->pNext;
957
958 uint32_t iPT = pSeg->cPTs;
959 while (iPT-- > 0)
960 {
961 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
962 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
963 }
964 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
965 pSeg->hMemObj = NIL_RTR0MEMOBJ;
966 pSeg->pNext = NULL;
967 pSeg->iPage = UINT16_MAX;
968 pSeg->cPages = 0;
969 pSeg->cPTs = 0;
970 RTMemFree(pSeg);
971 }
972
973 /*
974 * Free the arrays and restore the initial state.
975 * The cLoadMax value is left behind for the next setup.
976 */
977 RTMemFree(pThis->paPages);
978 pThis->paPages = NULL;
979 RTMemFree(pThis->pvSavedPTEs);
980 pThis->pvSavedPTEs = NULL;
981 pThis->cPages = 0;
982 pThis->cLoad = 0;
983}
984
985
986/**
987 * Release references to a page, caller owns the spin lock.
988 *
989 * @param pThis The dynamic mapping cache instance.
990 * @param iPage The page.
991 * @param cRefs The number of references to release.
992 */
993DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
994{
995 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs);
996 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
997 if (!cRefs)
998 pThis->cLoad--;
999}
1000
1001
1002/**
1003 * Release references to a page, caller does not own the spin lock.
1004 *
1005 * @param pThis The dynamic mapping cache instance.
1006 * @param iPage The page.
1007 * @param cRefs The number of references to release.
1008 */
1009static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1010{
1011 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1012 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1013 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1014 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1015}
1016
1017
1018/**
1019 * pgmR0DynMapPage worker that deals with the tedious bits.
1020 *
1021 * @returns The page index on success, UINT32_MAX on failure.
1022 * @param pThis The dynamic mapping cache instance.
1023 * @param HCPhys The address of the page to be mapped.
1024 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1025 */
1026static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage)
1027{
1028 /*
1029 * Check if any of the first 5 pages are unreferenced since the caller
1030 * already has made sure they aren't matching.
1031 */
1032 uint32_t const cPages = cPages;
1033 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1034 uint32_t iFreePage;
1035 if (!paPages[iPage].cRefs)
1036 iFreePage = iPage;
1037 else if (!paPages[(iPage + 1) % cPages].cRefs)
1038 iFreePage = iPage;
1039 else if (!paPages[(iPage + 2) % cPages].cRefs)
1040 iFreePage = iPage;
1041 else if (!paPages[(iPage + 3) % cPages].cRefs)
1042 iFreePage = iPage;
1043 else if (!paPages[(iPage + 4) % cPages].cRefs)
1044 iFreePage = iPage;
1045 else
1046 {
1047 /*
1048 * Search for an unused or matching entry.
1049 */
1050 iFreePage = (iPage + 5) % pThis->cPages;
1051 for (;;)
1052 {
1053 if (paPages[iFreePage].HCPhys == HCPhys)
1054 return iFreePage;
1055 if (!paPages[iFreePage].cRefs)
1056 break;
1057
1058 /* advance */
1059 iFreePage = (iFreePage + 1) % cPages;
1060 if (RT_UNLIKELY(iFreePage != iPage))
1061 return UINT32_MAX;
1062 }
1063 }
1064
1065 /*
1066 * Setup the new entry.
1067 */
1068 paPages[iFreePage].HCPhys = HCPhys;
1069 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1070 if (pThis->fLegacyMode)
1071 {
1072 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1073 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1074 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1075 | X86_PTE_P | X86_PTE_A | X86_PTE_D
1076 | (HCPhys & X86_PTE_PG_MASK);
1077 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1078 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1079 }
1080 else
1081 {
1082 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1083 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1084 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1085 | X86_PTE_P | X86_PTE_A | X86_PTE_D
1086 | (HCPhys & X86_PTE_PAE_PG_MASK);
1087 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1088 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1089 }
1090 return iFreePage;
1091}
1092
1093
1094/**
1095 * Maps a page into the pool.
1096 *
1097 * @returns Pointer to the mapping.
1098 * @param pThis The dynamic mapping cache instance.
1099 * @param HCPhys The address of the page to be mapped.
1100 * @param piPage Where to store the page index.
1101 */
1102DECLINLINE(void *) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t *piPage)
1103{
1104 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1105 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1106 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1107
1108 /*
1109 * Find an entry, if possible a matching one. The HCPhys address is hashed
1110 * down to a page index, collisions are handled by linear searching. Optimize
1111 * for a hit in the first 5 pages.
1112 *
1113 * To the cheap hits here and defer the tedious searching and inserting
1114 * to a helper function.
1115 */
1116 uint32_t const cPages = cPages;
1117 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1118 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1119 if (paPages[iPage].HCPhys != HCPhys)
1120 {
1121 uint32_t iPage2 = (iPage + 1) % cPages;
1122 if (paPages[iPage2].HCPhys != HCPhys)
1123 {
1124 iPage2 = (iPage + 2) % cPages;
1125 if (paPages[iPage2].HCPhys != HCPhys)
1126 {
1127 iPage2 = (iPage + 3) % cPages;
1128 if (paPages[iPage2].HCPhys != HCPhys)
1129 {
1130 iPage2 = (iPage + 4) % cPages;
1131 if (paPages[iPage2].HCPhys != HCPhys)
1132 {
1133 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage);
1134 if (RT_UNLIKELY(iPage == UINT32_MAX))
1135 {
1136 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1137 return NULL;
1138 }
1139 }
1140 else
1141 iPage = iPage2;
1142 }
1143 else
1144 iPage = iPage2;
1145 }
1146 else
1147 iPage = iPage2;
1148 }
1149 else
1150 iPage = iPage2;
1151 }
1152
1153 /*
1154 * Reference it, update statistics and get the return address.
1155 */
1156 if (ASMAtomicIncS32(&paPages[iPage].cRefs) == 1)
1157 {
1158 pThis->cLoad++;
1159 if (pThis->cLoad > pThis->cMaxLoad)
1160 pThis->cMaxLoad = pThis->cLoad;
1161 Assert(pThis->cLoad <= pThis->cPages);
1162 }
1163 void *pvPage = paPages[iPage].pvPage;
1164
1165 /*
1166 * Invalidate the entry?
1167 */
1168 RTCPUID idRealCpu = RTMpCpuId();
1169 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1170 if (fInvalidateIt)
1171 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1172
1173 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1174
1175 /*
1176 * Do the actual invalidation outside the spinlock.
1177 */
1178 ASMInvalidatePage(pvPage);
1179
1180 *piPage = iPage;
1181 return pvPage;
1182}
1183
1184
1185/**
1186 * Signals the start of a new set of mappings.
1187 *
1188 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1189 * API is called.
1190 *
1191 * @param pVCpu The shared data for the current virtual CPU.
1192 */
1193VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1194{
1195 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1196 pVCpu->pgm.s.AutoSet.cEntries = 0;
1197}
1198
1199
1200/**
1201 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1202 * since the PGMDynMapStartAutoSet call.
1203 *
1204 * @param pVCpu The shared data for the current virtual CPU.
1205 */
1206VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1207{
1208 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1209
1210 /* close the set */
1211 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
1212 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%#x (%u)\n", i, i));
1213 pVCpu->pgm.s.AutoSet.cEntries = PGMMAPSET_CLOSED;
1214
1215 /* release any pages we're referencing. */
1216 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
1217 {
1218 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1219 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1220 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1221
1222 while (i-- > 0)
1223 {
1224 uint32_t iPage = pSet->aEntries[i].iPage;
1225 Assert(iPage < pThis->cPages);
1226 int32_t cRefs = pSet->aEntries[i].cRefs;
1227 Assert(cRefs > 0);
1228 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1229
1230 pSet->aEntries[i].iPage = UINT16_MAX;
1231 pSet->aEntries[i].cRefs = 0;
1232 }
1233
1234 Assert(pThis->cLoad <= pThis->cPages);
1235 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1236 }
1237}
1238
1239
1240/**
1241 * Migrates the automatic mapping set of the current vCPU if necessary.
1242 *
1243 * This is called when re-entering the hardware assisted execution mode after a
1244 * nip down to ring-3. We run the risk that the CPU might have change and we
1245 * will therefore make sure all the cache entries currently in the auto set will
1246 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1247 * the entries will have been flagged as invalidated.
1248 *
1249 * @param pVCpu The shared data for the current virtual CPU.
1250 * @thread EMT
1251 */
1252VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1253{
1254 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1255 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
1256 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%#x (%u)\n", i, i));
1257 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
1258 {
1259 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1260 RTCPUID idRealCpu = RTMpCpuId();
1261
1262 while (i-- > 0)
1263 {
1264 Assert(pSet->aEntries[i].cRefs > 0);
1265 uint32_t iPage = pSet->aEntries[i].iPage;
1266 Assert(iPage < pThis->cPages);
1267 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1268 {
1269 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1270 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1271 }
1272 }
1273 }
1274}
1275
1276
1277/**
1278 * As a final resort for a full auto set, try merge duplicate entries.
1279 *
1280 * @param pSet The set.
1281 */
1282static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1283{
1284 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1285 {
1286 uint16_t const iPage = pSet->aEntries[i].iPage;
1287 uint32_t j = i + 1;
1288 while (j < pSet->cEntries)
1289 {
1290 if (pSet->aEntries[j].iPage != iPage)
1291 j++;
1292 else
1293 {
1294 /* merge j with i removing j. */
1295 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1296 pSet->cEntries--;
1297 if (j < pSet->cEntries)
1298 {
1299 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1300 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1301 pSet->aEntries[pSet->cEntries].cRefs = 0;
1302 }
1303 else
1304 {
1305 pSet->aEntries[j].iPage = UINT16_MAX;
1306 pSet->aEntries[j].cRefs = 0;
1307 }
1308 }
1309 }
1310 }
1311}
1312
1313
1314/* documented elsewhere - a bit of a mess. */
1315VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1316{
1317 /*
1318 * Validate state.
1319 */
1320 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1321 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1322 VERR_ACCESS_DENIED);
1323 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1324 PVMCPU pVCpu = VMMGetCpu(pVM);
1325 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1326 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1327 AssertMsgReturn(pSet->cEntries > RT_ELEMENTS(pSet->aEntries),
1328 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1329
1330 /*
1331 * Map it.
1332 */
1333 uint32_t iPage;
1334 void *pvPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, &iPage);
1335 if (RT_UNLIKELY(!pvPage))
1336 {
1337 static uint32_t s_cBitched = 0;
1338 if (++s_cBitched < 10)
1339 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u\n",
1340 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages));
1341 return VERR_PGM_DYNMAP_FAILED;
1342 }
1343
1344 /*
1345 * Add the page to the auto reference set.
1346 * If it's less than half full, don't bother looking for duplicates.
1347 */
1348 if (pSet->cEntries < RT_ELEMENTS(pSet->aEntries) / 2)
1349 {
1350 pSet->aEntries[pSet->cEntries].cRefs = 1;
1351 pSet->aEntries[pSet->cEntries].iPage = iPage;
1352 }
1353 else
1354 {
1355 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1356 int32_t i = pSet->cEntries;
1357 while (i-- > 0)
1358 if (pSet->aEntries[i].iPage)
1359 {
1360 pSet->aEntries[i].cRefs++;
1361 break;
1362 }
1363 if (i < 0)
1364 {
1365 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1366 pgmDynMapOptimizeAutoSet(pSet);
1367 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1368 {
1369 pSet->aEntries[pSet->cEntries].cRefs = 1;
1370 pSet->aEntries[pSet->cEntries].iPage = iPage;
1371 }
1372 else
1373 {
1374 /* We're screwed. */
1375 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1376
1377 static uint32_t s_cBitched = 0;
1378 if (++s_cBitched < 10)
1379 LogRel(("PGMDynMapHCPage: set is full!\n"));
1380 return VERR_PGM_DYNMAP_FULL_SET;
1381 }
1382 }
1383 }
1384
1385 return VINF_SUCCESS;
1386}
1387
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette