VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp@ 14504

Last change on this file since 14504 was 14504, checked in by vboxsync, 16 years ago

PGMR0DynMap: Re-enabled the code.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 46.6 KB
Line 
1/* $Id: PGMR0DynMap.cpp 14504 2008-11-24 03:10:33Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Internal Functions *
24*******************************************************************************/
25#include <VBox/pgm.h>
26#include "../PGMInternal.h"
27#include <VBox/vm.h>
28#include <VBox/sup.h>
29#include <VBox/err.h>
30#include <iprt/asm.h>
31#include <iprt/alloc.h>
32#include <iprt/assert.h>
33#include <iprt/cpuset.h>
34#include <iprt/memobj.h>
35#include <iprt/mp.h>
36#include <iprt/semaphore.h>
37#include <iprt/spinlock.h>
38#include <iprt/string.h>
39
40
41/*******************************************************************************
42* Defined Constants And Macros *
43*******************************************************************************/
44/** The max size of the mapping cache (in pages). */
45#define PGMR0DYNMAP_MAX_PAGES ((8*_1M) >> PAGE_SHIFT)
46/** The small segment size that is adopted on out-of-memory conditions with a
47 * single big segment. */
48#define PGMR0DYNMAP_SMALL_SEG_PAGES 128
49/** The number of pages we reserve per CPU. */
50#define PGMR0DYNMAP_PAGES_PER_CPU 64
51/** Calcs the overload threshold. Current set at 50%. */
52#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
53
54
55/*******************************************************************************
56* Structures and Typedefs *
57*******************************************************************************/
58/**
59 * Ring-0 dynamic mapping cache segment.
60 *
61 * The dynamic mapping cache can be extended with additional segments if the
62 * load is found to be too high. This done the next time a VM is created, under
63 * the protection of the init mutex. The arrays is reallocated and the new
64 * segment is added to the end of these. Nothing is rehashed of course, as the
65 * indexes / addresses must remain unchanged.
66 *
67 * This structure is only modified while owning the init mutex or during module
68 * init / term.
69 */
70typedef struct PGMR0DYNMAPSEG
71{
72 /** Pointer to the next segment. */
73 struct PGMR0DYNMAPSEG *pNext;
74 /** The memory object for the virtual address range that we're abusing. */
75 RTR0MEMOBJ hMemObj;
76 /** The start page in the cache. (I.e. index into the arrays.) */
77 uint16_t iPage;
78 /** The number of pages this segment contributes. */
79 uint16_t cPages;
80 /** The number of page tables. */
81 uint16_t cPTs;
82 /** The memory objects for the page tables. */
83 RTR0MEMOBJ ahMemObjPTs[1];
84} PGMR0DYNMAPSEG;
85/** Pointer to a ring-0 dynamic mapping cache segment. */
86typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
87
88
89/**
90 * Ring-0 dynamic mapping cache entry.
91 *
92 * This structure tracks
93 */
94typedef struct PGMR0DYNMAPENTRY
95{
96 /** The physical address of the currently mapped page.
97 * This is duplicate for three reasons: cache locality, cache policy of the PT
98 * mappings and sanity checks. */
99 RTHCPHYS HCPhys;
100 /** Pointer to the page. */
101 void *pvPage;
102 /** The number of references. */
103 int32_t volatile cRefs;
104 /** PTE pointer union. */
105 union PGMR0DYNMAPENTRY_PPTE
106 {
107 /** PTE pointer, 32-bit legacy version. */
108 PX86PTE pLegacy;
109 /** PTE pointer, PAE version. */
110 PX86PTEPAE pPae;
111 /** PTE pointer, the void version. */
112 void *pv;
113 } uPte;
114 /** CPUs that haven't invalidated this entry after it's last update. */
115 RTCPUSET PendingSet;
116} PGMR0DYNMAPENTRY;
117/** Pointer to a ring-0 dynamic mapping cache entry. */
118typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
119
120
121/**
122 * Ring-0 dynamic mapping cache.
123 *
124 * This is initialized during VMMR0 module init but no segments are allocated at
125 * that time. Segments will be added when the first VM is started and removed
126 * again when the last VM shuts down, thus avoid consuming memory while dormant.
127 * At module termination, the remaining bits will be freed up.
128 */
129typedef struct PGMR0DYNMAP
130{
131 /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */
132 uint32_t u32Magic;
133 /** Spinlock serializing the normal operation of the cache. */
134 RTSPINLOCK hSpinlock;
135 /** Array for tracking and managing the pages. */
136 PPGMR0DYNMAPENTRY paPages;
137 /** The cache size given as a number of pages. */
138 uint32_t cPages;
139 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
140 bool fLegacyMode;
141 /** The current load. */
142 uint32_t cLoad;
143 /** The max load ever.
144 * This is maintained to get trigger adding of more mapping space. */
145 uint32_t cMaxLoad;
146 /** Initialization / termination lock. */
147 RTSEMFASTMUTEX hInitLock;
148 /** The number of users (protected by hInitLock). */
149 uint32_t cUsers;
150 /** Array containing a copy of the original page tables.
151 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
152 void *pvSavedPTEs;
153 /** List of segments. */
154 PPGMR0DYNMAPSEG pSegHead;
155 /** The paging mode. */
156 SUPPAGINGMODE enmPgMode;
157} PGMR0DYNMAP;
158/** Pointer to the ring-0 dynamic mapping cache */
159typedef PGMR0DYNMAP *PPGMR0DYNMAP;
160
161/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
162#define PGMR0DYNMAP_MAGIC 0x19640201
163
164
165/**
166 * Paging level data.
167 */
168typedef struct PGMR0DYNMAPPGLVL
169{
170 uint32_t cLevels; /**< The number of levels. */
171 struct
172 {
173 RTHCPHYS HCPhys; /**< The address of the page for the current level,
174 * i.e. what hMemObj/hMapObj is currently mapping. */
175 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
176 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
177 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
178 uint32_t fPtrShift; /**< The pointer shift count. */
179 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
180 uint64_t fAndMask; /**< And mask to check entry flags. */
181 uint64_t fResMask; /**< The result from applying fAndMask. */
182 union
183 {
184 void *pv; /**< hMapObj address. */
185 PX86PGUINT paLegacy; /**< Legacy table view. */
186 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
187 } u;
188 } a[4];
189} PGMR0DYNMAPPGLVL;
190/** Pointer to paging level data. */
191typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
192
193
194/*******************************************************************************
195* Global Variables *
196*******************************************************************************/
197/** Pointer to the ring-0 dynamic mapping cache. */
198static PPGMR0DYNMAP g_pPGMR0DynMap;
199
200
201/*******************************************************************************
202* Internal Functions *
203*******************************************************************************/
204static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs);
205static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis);
206static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis);
207static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis);
208
209
210/**
211 * Initializes the ring-0 dynamic mapping cache.
212 *
213 * @returns VBox status code.
214 */
215VMMR0DECL(int) PGMR0DynMapInit(void)
216{
217#ifndef DEBUG_bird
218 return VINF_SUCCESS;
219#else
220 Assert(!g_pPGMR0DynMap);
221
222 /*
223 * Create and initialize the cache instance.
224 */
225 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis));
226 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
227 int rc = VINF_SUCCESS;
228 pThis->enmPgMode = SUPR0GetPagingMode();
229 switch (pThis->enmPgMode)
230 {
231 case SUPPAGINGMODE_32_BIT:
232 case SUPPAGINGMODE_32_BIT_GLOBAL:
233 pThis->fLegacyMode = false;
234 break;
235 case SUPPAGINGMODE_PAE:
236 case SUPPAGINGMODE_PAE_GLOBAL:
237 case SUPPAGINGMODE_PAE_NX:
238 case SUPPAGINGMODE_PAE_GLOBAL_NX:
239 case SUPPAGINGMODE_AMD64:
240 case SUPPAGINGMODE_AMD64_GLOBAL:
241 case SUPPAGINGMODE_AMD64_NX:
242 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
243 pThis->fLegacyMode = false;
244 break;
245 default:
246 rc = VERR_INTERNAL_ERROR;
247 break;
248 }
249 if (RT_SUCCESS(rc))
250 {
251 rc = RTSemFastMutexCreate(&pThis->hInitLock);
252 if (RT_SUCCESS(rc))
253 {
254 rc = RTSpinlockCreate(&pThis->hSpinlock);
255 if (RT_SUCCESS(rc))
256 {
257 pThis->u32Magic = PGMR0DYNMAP_MAGIC;
258 g_pPGMR0DynMap = pThis;
259 return VINF_SUCCESS;
260 }
261 RTSemFastMutexDestroy(pThis->hInitLock);
262 }
263 }
264 RTMemFree(pThis);
265 return rc;
266#endif
267}
268
269
270/**
271 * Terminates the ring-0 dynamic mapping cache.
272 */
273VMMR0DECL(void) PGMR0DynMapTerm(void)
274{
275#ifdef DEBUG_bird
276 /*
277 * Destroy the cache.
278 *
279 * There is not supposed to be any races here, the loader should
280 * make sure about that. So, don't bother locking anything.
281 *
282 * The VM objects should all be destroyed by now, so there is no
283 * dangling users or anything like that to clean up. This routine
284 * is just a mirror image of PGMR0DynMapInit.
285 */
286 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
287 if (pThis)
288 {
289 AssertPtr(pThis);
290 g_pPGMR0DynMap = NULL;
291
292 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->cPages,
293 ("cUsers=%d paPages=%p cPages=%#x\n",
294 pThis->cUsers, pThis->paPages, pThis->cPages));
295
296 /* Free the associated resources. */
297 RTSemFastMutexDestroy(pThis->hInitLock);
298 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
299 RTSpinlockDestroy(pThis->hSpinlock);
300 pThis->hSpinlock = NIL_RTSPINLOCK;
301 pThis->u32Magic = UINT32_MAX;
302 RTMemFree(pThis);
303 }
304#endif
305}
306
307
308/**
309 * Initializes the dynamic mapping cache for a new VM.
310 *
311 * @returns VBox status code.
312 * @param pVM Pointer to the shared VM structure.
313 */
314VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
315{
316 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
317
318 /*
319 * Initialize the auto sets.
320 */
321 VMCPUID idCpu = pVM->cCPUs;
322 AssertReturn(idCpu > 0 && idCpu <= VMCPU_MAX_CPU_COUNT, VERR_INTERNAL_ERROR);
323 while (idCpu-- > 0)
324 {
325 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
326 uint32_t j = RT_ELEMENTS(pSet->aEntries);
327 while (j-- > 0)
328 {
329 pSet->aEntries[j].iPage = UINT16_MAX;
330 pSet->aEntries[j].cRefs = 0;
331 }
332 pSet->cEntries = PGMMAPSET_CLOSED;
333 }
334
335 /*
336 * Do we need the cache? Skip the last bit if we don't.
337 */
338 if (!HWACCMIsEnabled(pVM))
339 return VINF_SUCCESS;
340
341 /*
342 * Reference and if necessary setup or expand the cache.
343 */
344 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
345 AssertPtrReturn(pThis, VERR_INTERNAL_ERROR);
346 int rc = RTSemFastMutexRequest(pThis->hInitLock);
347 AssertLogRelRCReturn(rc, rc);
348
349 pThis->cUsers++;
350 if (pThis->cUsers == 1)
351 rc = pgmR0DynMapSetup(pThis);
352 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages))
353 rc = pgmR0DynMapExpand(pThis);
354 if (RT_SUCCESS(rc))
355 pVM->pgm.s.pvR0DynMapUsed = pThis;
356 else
357 pThis->cUsers--;
358
359 RTSemFastMutexRelease(pThis->hInitLock);
360
361 return rc;
362}
363
364
365/**
366 * Terminates the dynamic mapping cache usage for a VM.
367 *
368 * @param pVM Pointer to the shared VM structure.
369 */
370VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
371{
372 /*
373 * Return immediately if we're not using the cache.
374 */
375 if (!pVM->pgm.s.pvR0DynMapUsed)
376 return;
377
378 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
379 AssertPtrReturnVoid(pThis);
380
381 int rc = RTSemFastMutexRequest(pThis->hInitLock);
382 AssertLogRelRCReturnVoid(rc);
383
384 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
385 {
386 pVM->pgm.s.pvR0DynMapUsed = NULL;
387
388 /*
389 * Clean up and check the auto sets.
390 */
391 VMCPUID idCpu = pVM->cCPUs;
392 while (idCpu-- > 0)
393 {
394 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
395 uint32_t j = pSet->cEntries;
396 if (j <= RT_ELEMENTS(pSet->aEntries))
397 {
398 /*
399 * The set is open, close it.
400 */
401 while (j-- > 0)
402 {
403 int32_t cRefs = pSet->aEntries[j].cRefs;
404 uint32_t iPage = pSet->aEntries[j].iPage;
405 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
406 if (iPage < pThis->cPages && cRefs > 0)
407 pgmR0DynMapReleasePage(pThis, iPage, cRefs);
408 else
409 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
410
411 pSet->aEntries[j].iPage = UINT16_MAX;
412 pSet->aEntries[j].cRefs = 0;
413 }
414 pSet->cEntries = PGMMAPSET_CLOSED;
415 }
416 else
417 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
418
419 j = RT_ELEMENTS(pSet->aEntries);
420 while (j-- > 0)
421 {
422 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
423 Assert(!pSet->aEntries[j].cRefs);
424 }
425 }
426
427 /*
428 * Release our reference to the mapping cache.
429 */
430 Assert(pThis->cUsers > 0);
431 pThis->cUsers--;
432 if (!pThis->cUsers)
433 pgmR0DynMapTearDown(pThis);
434 }
435 else
436 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
437
438 RTSemFastMutexRelease(pThis->hInitLock);
439}
440
441
442/**
443 * Calculate the new cache size based on cMaxLoad statistics.
444 *
445 * @returns Number of pages.
446 * @param pThis The dynamic mapping cache instance.
447 * @param pcMinPages The minimal size in pages.
448 */
449static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages)
450{
451 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
452
453 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU (/2). */
454 RTCPUID cCpus = RTMpGetCount();
455 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
456 uint32_t cMinPages = cCpus * (PGMR0DYNMAP_PAGES_PER_CPU / 2);
457
458 /* adjust against cMaxLoad. */
459 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
460 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
461 pThis->cMaxLoad = 0;
462
463 while (pThis->cMaxLoad < PGMR0DYNMAP_CALC_OVERLOAD(cPages))
464 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
465
466 if (pThis->cMaxLoad > cMinPages)
467 cMinPages = pThis->cMaxLoad;
468
469 /* adjust against max and current size. */
470 if (cPages < pThis->cPages)
471 cPages = pThis->cPages;
472 if (cPages > PGMR0DYNMAP_MAX_PAGES)
473 cPages = PGMR0DYNMAP_MAX_PAGES;
474
475 if (cMinPages < pThis->cPages)
476 cMinPages = pThis->cPages;
477 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
478 cMinPages = PGMR0DYNMAP_MAX_PAGES;
479
480 *pcMinPages = cMinPages;
481 return cPages;
482}
483
484
485/**
486 * Initializes the paging level data.
487 *
488 * @param pThis The dynamic mapping cache instance.
489 * @param pPgLvl The paging level data.
490 */
491void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
492{
493 RTCCUINTREG cr4 = ASMGetCR4();
494 switch (pThis->enmPgMode)
495 {
496 case SUPPAGINGMODE_32_BIT:
497 case SUPPAGINGMODE_32_BIT_GLOBAL:
498 pPgLvl->cLevels = 2;
499 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
500 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
501 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
502 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
503 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
504
505 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
506 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
507 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
508 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
509 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
510 break;
511
512 case SUPPAGINGMODE_PAE:
513 case SUPPAGINGMODE_PAE_GLOBAL:
514 case SUPPAGINGMODE_PAE_NX:
515 case SUPPAGINGMODE_PAE_GLOBAL_NX:
516 pPgLvl->cLevels = 3;
517 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
518 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
519 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
520 pPgLvl->a[0].fAndMask = X86_PDPE_P;
521 pPgLvl->a[0].fResMask = X86_PDPE_P;
522
523 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
524 pPgLvl->a[1].fPtrMask = X86_PD_MASK;
525 pPgLvl->a[1].fPtrShift = X86_PD_SHIFT;
526 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
527 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
528
529 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
530 pPgLvl->a[2].fPtrMask = X86_PT_MASK;
531 pPgLvl->a[2].fPtrShift = X86_PT_SHIFT;
532 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
533 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
534 break;
535
536 case SUPPAGINGMODE_AMD64:
537 case SUPPAGINGMODE_AMD64_GLOBAL:
538 case SUPPAGINGMODE_AMD64_NX:
539 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
540 pPgLvl->cLevels = 3;
541 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
542 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
543 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
544 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
545 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
546
547 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
548 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
549 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
550 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
551 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
552
553 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
554 pPgLvl->a[2].fPtrMask = X86_PD_MASK;
555 pPgLvl->a[2].fPtrShift = X86_PD_SHIFT;
556 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
557 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
558
559 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
560 pPgLvl->a[3].fPtrMask = X86_PT_MASK;
561 pPgLvl->a[3].fPtrShift = X86_PT_SHIFT;
562 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
563 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
564 break;
565
566 default:
567 AssertFailed();
568 pPgLvl->cLevels = 0;
569 break;
570 }
571
572 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
573 {
574 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
575 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
576 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
577 pPgLvl->a[i].u.pv = NULL;
578 }
579}
580
581
582/**
583 * Maps a PTE.
584 *
585 * This will update the segment structure when new PTs are mapped.
586 *
587 * It also assumes that we (for paranoid reasons) wish to establish a mapping
588 * chain from CR3 to the PT that all corresponds to the processor we're
589 * currently running on, and go about this by running with interrupts disabled
590 * and restarting from CR3 for every change.
591 *
592 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
593 * to re-enable interrupts.
594 * @param pThis The dynamic mapping cache instance.
595 * @param pPgLvl The paging level structure.
596 * @param pvPage The page.
597 * @param pSeg The segment.
598 * @param cMaxPTs The max number of PTs expected in the segment.
599 * @param ppvPTE Where to store the PTE address.
600 */
601static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
602 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
603{
604 Assert(!(ASMGetFlags() & X86_EFL_IF));
605
606 void *pvEntry = NULL;
607 X86PGPAEUINT uEntry = ASMGetCR3();
608 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
609 {
610 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
611 if (pPgLvl->a[i].HCPhys != HCPhys)
612 {
613 /*
614 * Need to remap this level.
615 * The final level, the PT, will not be freed since that is what it's all about.
616 */
617 ASMIntEnable();
618 if (i + 1 == pPgLvl->cLevels)
619 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR);
620 else
621 {
622 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
623 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
624 }
625
626 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE);
627 if (RT_SUCCESS(rc))
628 {
629 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj, &pPgLvl->a[i].u.pv, 0 /* cbAlignment */,
630 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
631 if (RT_SUCCESS(rc))
632 {
633 pPgLvl->a[i].HCPhys = HCPhys;
634 if (i + 1 == pPgLvl->cLevels)
635 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
636 ASMIntDisable();
637 return VINF_TRY_AGAIN;
638 }
639
640 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
641 }
642 else
643 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
644 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
645 return rc;
646 }
647
648 /*
649 * The next level.
650 */
651 uint32_t iEntry = ((uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
652 if (pThis->fLegacyMode)
653 {
654 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
655 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
656 }
657 else
658 {
659 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
660 uEntry = pPgLvl->a[i].u.paPae[iEntry];
661 }
662
663 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
664 {
665 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n",
666 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask));
667 return VERR_INTERNAL_ERROR;
668 }
669 }
670
671 /* made it thru without needing to remap anything. */
672 *ppvPTE = pvEntry;
673 return VINF_SUCCESS;
674}
675
676
677/**
678 * Adds a new segment of the specified size.
679 *
680 * @returns VBox status code.
681 * @param pThis The dynamic mapping cache instance.
682 * @param cPages The size of the new segment, give as a page count.
683 */
684static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages)
685{
686 int rc2;
687 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
688
689 /*
690 * Do the array rellocation first.
691 * (The pages array has to be replaced behind the spinlock of course.)
692 */
693 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
694 if (!pvSavedPTEs)
695 return VERR_NO_MEMORY;
696 pThis->pvSavedPTEs = pvSavedPTEs;
697
698 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
699 if (!pvPages)
700 {
701 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
702 if (pvSavedPTEs)
703 pThis->pvSavedPTEs = pvSavedPTEs;
704 return VERR_NO_MEMORY;
705 }
706
707 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
708 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
709
710 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
711 void *pvToFree = pThis->paPages;
712 pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages;
713
714 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
715 RTMemFree(pvToFree);
716
717 /*
718 * Allocate the segment structure and pages memory.
719 */
720 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
721 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
722 if (!pSeg)
723 return VERR_NO_MEMORY;
724 pSeg->pNext = NULL;
725 pSeg->cPages = cPages;
726 pSeg->iPage = pThis->cPages;
727 pSeg->cPTs = 0;
728 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
729 if (RT_SUCCESS(rc))
730 {
731 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
732 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
733
734 /*
735 * Walk thru the pages and set them up with a mapping of their PTE and everything.
736 */
737 ASMIntDisable();
738 PGMR0DYNMAPPGLVL PgLvl;
739 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
740 uint32_t iEndPage = pThis->cPages + cPages;
741 for (uint32_t iPage = pThis->cPages;
742 iPage < iEndPage;
743 iPage++, pbPage += PAGE_SIZE)
744 {
745 /* Initialize the page data. */
746 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
747 pThis->paPages[iPage].pvPage = pbPage;
748 pThis->paPages[iPage].cRefs = 0;
749 pThis->paPages[iPage].uPte.pPae = 0;
750 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
751
752 /* Map its page table, retry until we've got a clean run (paranoia). */
753 do
754 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
755 &pThis->paPages[iPage].uPte.pv);
756 while (rc == VINF_TRY_AGAIN);
757 if (RT_FAILURE(rc))
758 break;
759
760 /* Save the PTE. */
761 if (pThis->fLegacyMode)
762 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
763 else
764 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
765 } /* for each page */
766 ASMIntEnable();
767
768 /* cleanup non-PT mappings */
769 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
770 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
771
772 if (RT_SUCCESS(rc))
773 {
774 /** @todo setup guard pages here later (strict builds should leave every
775 * second page and the start/end pages not present). */
776
777 /*
778 * Commit it by adding the segment to the list and updating the page count.
779 */
780 pSeg->pNext = pThis->pSegHead;
781 pThis->pSegHead = pSeg;
782 pThis->cPages += cPages;
783 return VINF_SUCCESS;
784 }
785
786 /*
787 * Bail out.
788 */
789 while (pSeg->cPTs-- > 0)
790 {
791 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
792 AssertRC(rc2);
793 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
794 }
795
796 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
797 AssertRC(rc2);
798 pSeg->hMemObj = NIL_RTR0MEMOBJ;
799 }
800 RTMemFree(pSeg);
801
802 /* Don't bother resizing the arrays, too layz. */
803 return rc;
804}
805
806
807/**
808 * Called by PGMR0DynMapInitVM under the init lock.
809 *
810 * @returns VBox status code.
811 * @param pThis The dynamic mapping cache instance.
812 */
813static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis)
814{
815 /*
816 * Calc the size and add a segment of that size.
817 */
818 uint32_t cMinPages;
819 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
820 AssertReturn(cPages, VERR_INTERNAL_ERROR);
821 int rc = pgmR0DynMapAddSeg(pThis, cPages);
822 if (rc == VERR_NO_MEMORY)
823 {
824 /*
825 * Try adding smaller segments.
826 */
827 do
828 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
829 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
830 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
831 rc = VINF_SUCCESS;
832 if (rc == VERR_NO_MEMORY)
833 {
834 if (pThis->cPages)
835 pgmR0DynMapTearDown(pThis);
836 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
837 }
838 }
839 Assert(ASMGetFlags() & X86_EFL_IF);
840 return rc;
841}
842
843
844/**
845 * Called by PGMR0DynMapInitVM under the init lock.
846 *
847 * @returns VBox status code.
848 * @param pThis The dynamic mapping cache instance.
849 */
850static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis)
851{
852 /*
853 * Calc the new target size and add a segment of the appropriate size.
854 */
855 uint32_t cMinPages;
856 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
857 if (pThis->cPages >= cPages)
858 return VINF_SUCCESS;
859
860 uint32_t cAdd = cPages - pThis->cPages;
861 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
862 if (rc == VERR_NO_MEMORY)
863 {
864 /*
865 * Try adding smaller segments.
866 */
867 do
868 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
869 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
870 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
871 rc = VINF_SUCCESS;
872 if (rc == VERR_NO_MEMORY)
873 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
874 }
875 Assert(ASMGetFlags() & X86_EFL_IF);
876 return rc;
877}
878
879
880/**
881 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
882 *
883 * @param idCpu The current CPU.
884 * @param pvUser1 The dynamic mapping cache instance.
885 * @param pvUser2 Unused, NULL.
886 */
887static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
888{
889 Assert(!pvUser2);
890 PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1;
891 AssertPtr(pThis == g_pPGMR0DynMap);
892 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
893 uint32_t iPage = pThis->cPages;
894 while (iPage-- > 0)
895 ASMInvalidatePage(paPages[iPage].pvPage);
896}
897
898
899/**
900 * Called by PGMR0DynMapTermVM under the init lock.
901 *
902 * @returns VBox status code.
903 * @param pThis The dynamic mapping cache instance.
904 */
905static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis)
906{
907 /*
908 * Restore the original page table entries
909 */
910 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
911 uint32_t iPage = pThis->cPages;
912 if (pThis->fLegacyMode)
913 {
914 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
915 while (iPage-- > 0)
916 {
917 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
918 X86PGUINT uOld2 = uOld; NOREF(uOld2);
919 X86PGUINT uNew = paSavedPTEs[iPage];
920 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
921 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
922 }
923 }
924 else
925 {
926 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
927 while (iPage-- > 0)
928 {
929 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
930 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
931 X86PGPAEUINT uNew = paSavedPTEs[iPage];
932 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
933 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
934 }
935 }
936
937 /*
938 * Shoot down the TLBs on all CPUs before freeing them.
939 * If RTMpOnAll fails, make sure the TLBs are invalidated on the current CPU at least.
940 */
941 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
942 AssertRC(rc);
943 if (RT_FAILURE(rc))
944 {
945 iPage = pThis->cPages;
946 while (iPage-- > 0)
947 ASMInvalidatePage(paPages[iPage].pvPage);
948 }
949
950 /*
951 * Free the segments.
952 */
953 while (pThis->pSegHead)
954 {
955 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
956 pThis->pSegHead = pSeg->pNext;
957
958 uint32_t iPT = pSeg->cPTs;
959 while (iPT-- > 0)
960 {
961 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
962 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
963 }
964 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
965 pSeg->hMemObj = NIL_RTR0MEMOBJ;
966 pSeg->pNext = NULL;
967 pSeg->iPage = UINT16_MAX;
968 pSeg->cPages = 0;
969 pSeg->cPTs = 0;
970 RTMemFree(pSeg);
971 }
972
973 /*
974 * Free the arrays and restore the initial state.
975 * The cLoadMax value is left behind for the next setup.
976 */
977 RTMemFree(pThis->paPages);
978 pThis->paPages = NULL;
979 RTMemFree(pThis->pvSavedPTEs);
980 pThis->pvSavedPTEs = NULL;
981 pThis->cPages = 0;
982 pThis->cLoad = 0;
983}
984
985
986/**
987 * Release references to a page, caller owns the spin lock.
988 *
989 * @param pThis The dynamic mapping cache instance.
990 * @param iPage The page.
991 * @param cRefs The number of references to release.
992 */
993DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs)
994{
995 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs);
996 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
997 if (!cRefs)
998 pThis->cLoad--;
999}
1000
1001
1002/**
1003 * Release references to a page, caller does not own the spin lock.
1004 *
1005 * @param pThis The dynamic mapping cache instance.
1006 * @param iPage The page.
1007 * @param cRefs The number of references to release.
1008 */
1009static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1010{
1011 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1012 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1013 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1014 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1015}
1016
1017
1018/**
1019 * pgmR0DynMapPage worker that deals with the tedious bits.
1020 *
1021 * @returns The page index on success, UINT32_MAX on failure.
1022 * @param pThis The dynamic mapping cache instance.
1023 * @param HCPhys The address of the page to be mapped.
1024 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1025 */
1026static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage)
1027{
1028 /*
1029 * Check if any of the first 5 pages are unreferenced since the caller
1030 * already has made sure they aren't matching.
1031 */
1032 uint32_t const cPages = cPages;
1033 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1034 uint32_t iFreePage;
1035 if (!paPages[iPage].cRefs)
1036 iFreePage = iPage;
1037 else if (!paPages[(iPage + 1) % cPages].cRefs)
1038 iFreePage = iPage;
1039 else if (!paPages[(iPage + 2) % cPages].cRefs)
1040 iFreePage = iPage;
1041 else if (!paPages[(iPage + 3) % cPages].cRefs)
1042 iFreePage = iPage;
1043 else if (!paPages[(iPage + 4) % cPages].cRefs)
1044 iFreePage = iPage;
1045 else
1046 {
1047 /*
1048 * Search for an unused or matching entry.
1049 */
1050 iFreePage = (iPage + 5) % pThis->cPages;
1051 for (;;)
1052 {
1053 if (paPages[iFreePage].HCPhys == HCPhys)
1054 return iFreePage;
1055 if (!paPages[iFreePage].cRefs)
1056 break;
1057
1058 /* advance */
1059 iFreePage = (iFreePage + 1) % cPages;
1060 if (RT_UNLIKELY(iFreePage != iPage))
1061 return UINT32_MAX;
1062 }
1063 }
1064
1065 /*
1066 * Setup the new entry.
1067 */
1068 paPages[iFreePage].HCPhys = HCPhys;
1069 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1070 if (pThis->fLegacyMode)
1071 {
1072 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1073 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1074 X86PGUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1075 | X86_PTE_P | X86_PTE_A | X86_PTE_D
1076 | (HCPhys & X86_PTE_PG_MASK);
1077 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1078 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1079 }
1080 else
1081 {
1082 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1083 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1084 X86PGPAEUINT uNew = (uOld & X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)
1085 | X86_PTE_P | X86_PTE_A | X86_PTE_D
1086 | (HCPhys & X86_PTE_PAE_PG_MASK);
1087 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1088 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1089 }
1090 return iFreePage;
1091}
1092
1093
1094/**
1095 * Maps a page into the pool.
1096 *
1097 * @returns Pointer to the mapping.
1098 * @param pThis The dynamic mapping cache instance.
1099 * @param HCPhys The address of the page to be mapped.
1100 * @param piPage Where to store the page index.
1101 */
1102DECLINLINE(void *) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t *piPage)
1103{
1104 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1105 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1106 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1107
1108 /*
1109 * Find an entry, if possible a matching one. The HCPhys address is hashed
1110 * down to a page index, collisions are handled by linear searching. Optimize
1111 * for a hit in the first 5 pages.
1112 *
1113 * To the cheap hits here and defer the tedious searching and inserting
1114 * to a helper function.
1115 */
1116 uint32_t const cPages = cPages;
1117 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1118 PPGMR0DYNMAPENTRY paPages = pThis->paPages;
1119 if (paPages[iPage].HCPhys != HCPhys)
1120 {
1121 uint32_t iPage2 = (iPage + 1) % cPages;
1122 if (paPages[iPage2].HCPhys != HCPhys)
1123 {
1124 iPage2 = (iPage + 2) % cPages;
1125 if (paPages[iPage2].HCPhys != HCPhys)
1126 {
1127 iPage2 = (iPage + 3) % cPages;
1128 if (paPages[iPage2].HCPhys != HCPhys)
1129 {
1130 iPage2 = (iPage + 4) % cPages;
1131 if (paPages[iPage2].HCPhys != HCPhys)
1132 {
1133 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage);
1134 if (RT_UNLIKELY(iPage == UINT32_MAX))
1135 {
1136 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1137 return NULL;
1138 }
1139 }
1140 else
1141 iPage = iPage2;
1142 }
1143 else
1144 iPage = iPage2;
1145 }
1146 else
1147 iPage = iPage2;
1148 }
1149 else
1150 iPage = iPage2;
1151 }
1152
1153 /*
1154 * Reference it, update statistics and get the return address.
1155 */
1156 if (ASMAtomicIncS32(&paPages[iPage].cRefs) == 1)
1157 {
1158 pThis->cLoad++;
1159 if (pThis->cLoad > pThis->cMaxLoad)
1160 pThis->cMaxLoad = pThis->cLoad;
1161 Assert(pThis->cLoad <= pThis->cPages);
1162 }
1163 void *pvPage = paPages[iPage].pvPage;
1164
1165 /*
1166 * Invalidate the entry?
1167 */
1168 RTCPUID idRealCpu = RTMpCpuId();
1169 bool fInvalidateIt = RTCpuSetIsMember(&paPages[iPage].PendingSet, idRealCpu);
1170 if (fInvalidateIt)
1171 RTCpuSetDel(&paPages[iPage].PendingSet, idRealCpu);
1172
1173 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1174
1175 /*
1176 * Do the actual invalidation outside the spinlock.
1177 */
1178 ASMInvalidatePage(pvPage);
1179
1180 *piPage = iPage;
1181 return pvPage;
1182}
1183
1184
1185/**
1186 * Signals the start of a new set of mappings.
1187 *
1188 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1189 * API is called.
1190 *
1191 * @param pVCpu The shared data for the current virtual CPU.
1192 */
1193VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu)
1194{
1195 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1196 pVCpu->pgm.s.AutoSet.cEntries = 0;
1197}
1198
1199
1200/**
1201 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1202 * since the PGMDynMapStartAutoSet call.
1203 *
1204 * @param pVCpu The shared data for the current virtual CPU.
1205 */
1206VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu)
1207{
1208 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1209
1210 /* close the set */
1211 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
1212 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%#x (%u)\n", i, i));
1213 pVCpu->pgm.s.AutoSet.cEntries = PGMMAPSET_CLOSED;
1214
1215 /* release any pages we're referencing. */
1216 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
1217 {
1218 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1219 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
1220 RTSpinlockAcquire(pThis->hSpinlock, &Tmp);
1221
1222 while (i-- > 0)
1223 {
1224 uint32_t iPage = pSet->aEntries[i].iPage;
1225 Assert(iPage < pThis->cPages);
1226 int32_t cRefs = pSet->aEntries[i].cRefs;
1227 Assert(cRefs > 0);
1228 pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs);
1229
1230 pSet->aEntries[i].iPage = UINT16_MAX;
1231 pSet->aEntries[i].cRefs = 0;
1232 }
1233
1234 Assert(pThis->cLoad <= pThis->cPages);
1235 RTSpinlockRelease(pThis->hSpinlock, &Tmp);
1236 }
1237}
1238
1239
1240/**
1241 * Migrates the automatic mapping set of the current vCPU if necessary.
1242 *
1243 * This is called when re-entering the hardware assisted execution mode after a
1244 * nip down to ring-3. We run the risk that the CPU might have change and we
1245 * will therefore make sure all the cache entries currently in the auto set will
1246 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
1247 * the entries will have been flagged as invalidated.
1248 *
1249 * @param pVCpu The shared data for the current virtual CPU.
1250 * @thread EMT
1251 */
1252VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu)
1253{
1254 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1255 uint32_t i = pVCpu->pgm.s.AutoSet.cEntries;
1256 AssertMsg(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries), ("%#x (%u)\n", i, i));
1257 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pVCpu->pgm.s.AutoSet.aEntries)))
1258 {
1259 PPGMR0DYNMAP pThis = g_pPGMR0DynMap;
1260 RTCPUID idRealCpu = RTMpCpuId();
1261
1262 while (i-- > 0)
1263 {
1264 Assert(pSet->aEntries[i].cRefs > 0);
1265 uint32_t iPage = pSet->aEntries[i].iPage;
1266 Assert(iPage < pThis->cPages);
1267 if (RTCpuSetIsMember(&pThis->paPages[iPage].PendingSet, idRealCpu))
1268 {
1269 RTCpuSetDel(&pThis->paPages[iPage].PendingSet, idRealCpu);
1270 ASMInvalidatePage(pThis->paPages[iPage].pvPage);
1271 }
1272 }
1273 }
1274}
1275
1276
1277/**
1278 * As a final resort for a full auto set, try merge duplicate entries.
1279 *
1280 * @param pSet The set.
1281 */
1282static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1283{
1284 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1285 {
1286 uint16_t const iPage = pSet->aEntries[i].iPage;
1287 uint32_t j = i + 1;
1288 while (j < pSet->cEntries)
1289 {
1290 if (pSet->aEntries[j].iPage != iPage)
1291 j++;
1292 else
1293 {
1294 /* merge j with i removing j. */
1295 pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs;
1296 pSet->cEntries--;
1297 if (j < pSet->cEntries)
1298 {
1299 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1300 pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX;
1301 pSet->aEntries[pSet->cEntries].cRefs = 0;
1302 }
1303 else
1304 {
1305 pSet->aEntries[j].iPage = UINT16_MAX;
1306 pSet->aEntries[j].cRefs = 0;
1307 }
1308 }
1309 }
1310 }
1311}
1312
1313
1314/* documented elsewhere - a bit of a mess. */
1315VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv)
1316{
1317 /*
1318 * Validate state.
1319 */
1320 AssertMsgReturn(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap,
1321 ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap),
1322 VERR_ACCESS_DENIED);
1323 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1324 PVMCPU pVCpu = VMMGetCpu(pVM);
1325 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1326 AssertPtrReturn(pVCpu, VERR_INTERNAL_ERROR);
1327 AssertMsgReturn(pSet->cEntries > RT_ELEMENTS(pSet->aEntries),
1328 ("%#x (%u)\n", pSet->cEntries, pSet->cEntries), VERR_WRONG_ORDER);
1329
1330 /*
1331 * Map it.
1332 */
1333 uint32_t iPage;
1334 void *pvPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, &iPage);
1335 if (RT_UNLIKELY(!pvPage))
1336 {
1337 static uint32_t s_cBitched = 0;
1338 if (++s_cBitched < 10)
1339 LogRel(("PGMDynMapHCPage: cLoad=%u/%u cPages=%u\n",
1340 g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages));
1341 return VERR_PGM_DYNMAP_FAILED;
1342 }
1343
1344 /*
1345 * Add the page to the auto reference set.
1346 * If it's less than half full, don't bother looking for duplicates.
1347 */
1348 if (pSet->cEntries < RT_ELEMENTS(pSet->aEntries) / 2)
1349 {
1350 pSet->aEntries[pSet->cEntries].cRefs = 1;
1351 pSet->aEntries[pSet->cEntries].iPage = iPage;
1352 }
1353 else
1354 {
1355 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
1356 int32_t i = pSet->cEntries;
1357 while (i-- > 0)
1358 if (pSet->aEntries[i].iPage)
1359 {
1360 pSet->aEntries[i].cRefs++;
1361 break;
1362 }
1363 if (i < 0)
1364 {
1365 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
1366 pgmDynMapOptimizeAutoSet(pSet);
1367 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
1368 {
1369 pSet->aEntries[pSet->cEntries].cRefs = 1;
1370 pSet->aEntries[pSet->cEntries].iPage = iPage;
1371 }
1372 else
1373 {
1374 /* We're screwed. */
1375 pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1);
1376
1377 static uint32_t s_cBitched = 0;
1378 if (++s_cBitched < 10)
1379 LogRel(("PGMDynMapHCPage: set is full!\n"));
1380 return VERR_PGM_DYNMAP_FULL_SET;
1381 }
1382 }
1383 }
1384
1385 return VINF_SUCCESS;
1386}
1387
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette