VirtualBox

Changeset 41622 in vbox


Ignore:
Timestamp:
Jun 8, 2012 1:01:44 PM (13 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
78419
Message:

Runtime/r0drv/Solaris: Implemented large page support. Fixed possible freeing of wrong pages with regular 4K pages.

Location:
trunk/src/VBox/Runtime/r0drv/solaris
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c

    r41146 r41622  
    55
    66/*
    7  * Copyright (C) 2006-2007 Oracle Corporation
     7 * Copyright (C) 2006-2012 Oracle Corporation
    88 *
    99 * This file is part of VirtualBox Open Source Edition (OSE), as
     
    7575*******************************************************************************/
    7676static vnode_t                  g_PageVnode;
     77static kmutex_t                 g_OffsetMtx;
     78static u_offset_t               g_offPage;
     79
     80static vnode_t                  g_LargePageVnode;
     81static kmutex_t                 g_LargePageOffsetMtx;
     82static u_offset_t               g_offLargePage;
    7783
    7884
     
    101107    PageFrameNum = hat_getpfnum(pHat, (caddr_t)(uVirtAddr & PAGEMASK));
    102108    AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolVirtToPhys failed. pv=%p\n", pv));
    103     return (((uint64_t)PageFrameNum << PAGESHIFT) | (uVirtAddr & PAGEOFFSET));
     109    return (((uint64_t)PageFrameNum << PAGE_SHIFT) | (uVirtAddr & PAGE_OFFSET_MASK));
    104110}
    105111
     
    117123    pfn_t PageFrameNum = page_pptonum(pPage);
    118124    AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolPagePhys failed pPage=%p\n"));
    119     return (uint64_t)PageFrameNum << PAGESHIFT;
     125    return (uint64_t)PageFrameNum << PAGE_SHIFT;
    120126}
    121127
    122128
    123129/**
    124  * Retreives a free page from the kernel freelist.
     130 * Allocates one page.
    125131 *
    126132 * @param virtAddr       The virtual address to which this page maybe mapped in
     
    130136 * @returns Pointer to the allocated page, NULL on failure.
    131137 */
    132 static page_t *rtR0MemObjSolPageFromFreelist(caddr_t virtAddr, size_t cbPage)
    133 {
    134     seg_t KernelSeg;
     138static page_t *rtR0MemObjSolPageAlloc(caddr_t virtAddr, size_t cbPage)
     139{
     140    Assert(cbPage == PAGE_SIZE);
     141
     142    u_offset_t offPage;
     143    seg_t      KernelSeg;
     144
     145    mutex_enter(&g_OffsetMtx);
     146    AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
     147    g_offPage = RT_ALIGN_64(g_offPage, cbPage) + cbPage;
     148    offPage   = g_offPage;
     149    mutex_exit(&g_OffsetMtx);
     150
    135151    KernelSeg.s_as = &kas;
    136     page_t *pPage = page_get_freelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
    137                                       cbPage, 0 /* flags */, NULL /* NUMA group */);
    138     if (   !pPage
    139         && g_frtSolUseKflt)
    140     {
    141         pPage = page_get_freelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
    142                                   cbPage, PG_KFLT, NULL /* NUMA group */);
    143     }
     152    page_t *pPage = page_create_va(&g_PageVnode, offPage, cbPage, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr);
    144153    return pPage;
    145154}
     
    147156
    148157/**
    149  * Retrieves a free page from the kernel cachelist.
    150  *
    151  * @param virtAddr      The virtual address to which this page maybe mapped in
    152  *                      the future.
    153  * @param cbPage        The size of the page.
    154  *
    155  * @return Pointer to the allocated page, NULL on failure.
    156  */
    157 static page_t *rtR0MemObjSolPageFromCachelist(caddr_t virtAddr, size_t cbPage)
    158 {
    159     seg_t KernelSeg;
    160     KernelSeg.s_as = &kas;
    161     page_t *pPage = page_get_cachelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
    162                                        0 /* flags */, NULL /* NUMA group */);
    163     if (   !pPage
    164         && g_frtSolUseKflt)
    165     {
    166         pPage = page_get_cachelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
    167                                    PG_KFLT, NULL /* NUMA group */);
    168     }
    169 
    170     /*
    171      * Remove association with the vnode for pages from the cachelist.
    172      */
    173     if (!PP_ISAGED(pPage))
    174         page_hashout(pPage, NULL /* mutex */);
    175 
    176     return pPage;
    177 }
    178 
    179 
    180 /**
    181  * Allocates physical non-contiguous memory.
     158 * Allocates physical, non-contiguous memory of pages.
    182159 *
    183160 * @param uPhysHi   The upper physical address limit (inclusive).
     
    191168{
    192169    /*
     170     * VM1:
    193171     * The page freelist and cachelist both hold pages that are not mapped into any address space.
    194172     * The cachelist is not really free pages but when memory is exhausted they'll be moved to the
    195173     * free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat.
    196174     *
    197      * Reserve available memory for pages and create the pages.
     175     * VM2:
     176     * @todo Document what happens behind the scenes in VM2 regarding the free and cachelist.
    198177     */
    199     pgcnt_t cPages = (cb + PAGESIZE - 1) >> PAGESHIFT;
     178
     179    /*
     180     * Non-pageable memory reservation request for _4K pages, don't sleep.
     181     */
     182    pgcnt_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
    200183    int rc = page_resv(cPages, KM_NOSLEEP);
    201184    if (rc)
    202185    {
    203         rc = page_create_wait(cPages, 0 /* flags */);
    204         if (rc)
    205         {
    206             size_t   cbPages = cPages * sizeof(page_t *);
    207             page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
    208             if (RT_LIKELY(ppPages))
     186        size_t   cbPages = cPages * sizeof(page_t *);
     187        page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
     188        if (RT_LIKELY(ppPages))
     189        {
     190            /*
     191             * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
     192             * we don't yet have the 'virtAddr' to which this memory may be mapped.
     193             */
     194            caddr_t virtAddr = NULL;
     195            for (size_t i = 0; i < cPages; i++, virtAddr += PAGE_SIZE)
    209196            {
    210                 /*
    211                  * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
    212                  * we don't yet have the 'virtAddr' to which this memory may be mapped.
    213                  */
    214                 caddr_t virtAddr = NULL;
    215                 for (size_t i = 0; i < cPages; i++, virtAddr += PAGESIZE)
     197                uint32_t cTries = 3;
     198                page_t *pPage   = NULL;
     199                while (cTries > 0)
    216200                {
    217                     uint32_t cTries = 3;
    218                     page_t *pPage   = NULL;
    219                     while (cTries > 0)
     201                    /*
     202                     * Get a page from the free list locked exclusively. The page will be named (hashed in).
     203                     * Hashing out the page has no real benefits. Downgrade the page to a shared lock to                                     .
     204                     * prevent the page from being relocated.
     205                     */
     206                    pPage = rtR0MemObjSolPageAlloc(virtAddr, PAGE_SIZE);
     207                    if (!pPage)
     208                        break;
     209
     210                    page_io_unlock(pPage);
     211                    page_downgrade(pPage);
     212                    Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
     213
     214                    /*
     215                     * Check if the physical address backing the page is within the requested range if any.
     216                     * If it isn't, discard the page and try again.
     217                     */
     218                    if (uPhysHi != NIL_RTHCPHYS)
    220219                    {
    221                         /*
    222                          * Get a page from the freelist or cachelist & verify if it's within our
    223                          * requested range.
    224                          */
    225                         pPage = rtR0MemObjSolPageFromFreelist(virtAddr, PAGESIZE);
    226                         if (!pPage)
     220                        uint64_t uPhys = rtR0MemObjSolPagePhys(pPage);
     221                        if (uPhys > uPhysHi)
    227222                        {
    228                             pPage = rtR0MemObjSolPageFromCachelist(virtAddr, PAGESIZE);
    229                             if (RT_UNLIKELY(!pPage))
    230                                 break;
     223                            page_destroy(pPage, 0 /* move it to the free list */);
     224                            pPage = NULL;
     225                            --cTries;
     226                            continue;
    231227                        }
    232                         if (uPhysHi != NIL_RTHCPHYS)
    233                         {
    234                             uint64_t uPhys = rtR0MemObjSolPagePhys(pPage);
    235                             if (uPhys > uPhysHi)
    236                             {
    237                                 page_free(pPage, 0 /* don't need page, move to tail of pagelist */);
    238                                 pPage = NULL;
    239                                 --cTries;
    240                                 continue;
    241                             }
    242                         }
    243 
    244                         PP_CLRFREE(pPage);      /* Page is no longer free */
    245                         PP_CLRAGED(pPage);      /* Page is not hashed in */
    246                         ppPages[i] = pPage;
    247                         break;
    248228                    }
    249229
    250                     if (RT_UNLIKELY(!pPage))
    251                     {
    252                         /*
    253                          * No pages found or found pages didn't meet requirements, release what was grabbed so far.
    254                          */
    255                         page_create_putback(cPages - i);
    256                         while (--i >= 0)
    257                             page_free(ppPages[i], 0 /* don't need page, move to tail of pagelist */);
    258                         kmem_free(ppPages, cbPages);
    259                         page_unresv(cPages);
    260                         return NULL;
    261                     }
     230                    ppPages[i] = pPage;
     231                    break;
    262232                }
    263233
    264                 /*
    265                  * We now have the pages locked exclusively, before they are mapped in
    266                  * we must downgrade the lock.
    267                  */
    268                 if (puPhys)
    269                     *puPhys = rtR0MemObjSolPagePhys(ppPages[0]);
    270                 return ppPages;
     234                if (RT_UNLIKELY(!pPage))
     235                {
     236                    /*
     237                     * No pages found or found pages didn't meet requirements, release what was grabbed so far.
     238                     */
     239                    while (--i >= 0)
     240                        page_destroy(ppPages[i], 0 /* move it to the free list */);
     241                    kmem_free(ppPages, cbPages);
     242                    page_unresv(cPages);
     243                    return NULL;
     244                }
    271245            }
    272246
    273             page_create_putback(cPages);
     247            if (puPhys)
     248                *puPhys = rtR0MemObjSolPagePhys(ppPages[0]);
     249            return ppPages;
    274250        }
    275251
     
    282258
    283259/**
    284  * Prepares pages allocated by rtR0MemObjSolPagesAlloc for mapping.
    285  *
    286  * @param    ppPages    Pointer to the page list.
    287  * @param    cb         Size of the allocation.
    288  * @param    auPhys     Where to store the physical address of the premapped
    289  *                      pages.
    290  * @param    cPages     The number of pages (entries) in @a auPhys.
    291  *
    292  * @returns IPRT status code.
    293  */
    294 static int rtR0MemObjSolPagesPreMap(page_t **ppPages, size_t cb, uint64_t auPhys[], size_t cPages)
    295 {
    296     AssertPtrReturn(ppPages, VERR_INVALID_PARAMETER);
    297     AssertPtrReturn(auPhys, VERR_INVALID_PARAMETER);
    298 
    299     for (size_t iPage = 0; iPage < cPages; iPage++)
    300     {
    301         /*
    302          * Prepare pages for mapping into kernel/user-space. Downgrade the
    303          * exclusive page lock to a shared lock to prevent page relocation.
    304          */
    305         if (page_tryupgrade(ppPages[iPage]) == 1)
    306             page_downgrade(ppPages[iPage]);
    307 
    308         auPhys[iPage] = rtR0MemObjSolPagePhys(ppPages[iPage]);
    309     }
    310 
    311     return VINF_SUCCESS;
    312 }
    313 
    314 
    315 /**
    316  * Frees pages allocated by rtR0MemObjSolPagesAlloc.
     260 * Frees the allocates pages.
    317261 *
    318262 * @param ppPages       Pointer to the page list.
     
    321265static void rtR0MemObjSolPagesFree(page_t **ppPages, size_t cb)
    322266{
    323     size_t cPages  = (cb + PAGESIZE - 1) >> PAGESHIFT;
     267    size_t cPages  = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
    324268    size_t cbPages = cPages * sizeof(page_t *);
    325269    for (size_t iPage = 0; iPage < cPages; iPage++)
     
    328272         *  We need to exclusive lock the pages before freeing them.
    329273         */
     274        page_t     *pPage  = ppPages[iPage];
     275        u_offset_t offPage = pPage->p_offset;
     276
    330277        int rc = page_tryupgrade(ppPages[iPage]);
    331278        if (!rc)
    332279        {
    333             page_unlock(ppPages[iPage]);
    334             while (!page_lock(ppPages[iPage], SE_EXCL, NULL /* mutex */, P_RECLAIM))
    335             {
    336                 /* nothing */;
    337             }
    338         }
    339         page_free(ppPages[iPage], 0 /* don't need page, move to tail of pagelist */);
     280            page_unlock(pPage);
     281            page_t *pFoundPage = page_lookup(&g_PageVnode, offPage, SE_EXCL);
     282
     283            /*
     284             * Since we allocated the pages as PG_NORELOC we should only get back the exact page always.
     285             */
     286            AssertReleaseMsg(pFoundPage == pPage, ("Page lookup failed %p:%llx returned %p, expected %p\n",
     287                                                   &g_PageVnode, offPage, pFoundPage, pPage));
     288        }
     289        Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
     290        page_destroy(pPage, 0 /* move it to the free list */);
    340291    }
    341292    kmem_free(ppPages, cbPages);
     
    345296
    346297/**
    347  * Allocates a large page to cover the required allocation size.
     298 * Allocates one large page. There is currently no way on Solaris to request
     299 * a block larger than one page backed with physically contiguous memory, i.e.
     300 * PG_PHYSCONTIG is not yet supported.
    348301 *
    349302 * @param puPhys        Where to store the physical address of the allocated
    350303 *                      page. Optional, can be NULL.
    351  * @param cb            Size of the allocation.
    352  *
    353  * @returns Pointer to the allocated large page, NULL on failure.
    354  */
    355 static page_t *rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cb)
     304 * @param cbLargePage   Size of the large page.
     305 *
     306 * @returns Pointer to a list of pages that cover the large page, NULL on
     307 *        failure.
     308 */
     309static page_t **rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cbLargePage)
    356310{
    357311    /*
    358      * Reserve available memory and create the sub-pages.
     312     * Non-pageable memory reservation request for _4K pages, don't sleep.
    359313     */
    360     const pgcnt_t cPages = cb >> PAGESHIFT;
     314    size_t cPages       = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
     315    size_t cbPages      = cPages * sizeof(page_t *);
     316    u_offset_t offPage  = 0;
    361317    int rc = page_resv(cPages, KM_NOSLEEP);
    362318    if (rc)
    363319    {
    364         rc = page_create_wait(cPages, 0 /* flags */);
    365         if (rc)
    366         {
    367             /*
    368              * Get a page off the free list. We set virtAddr to 0 since we don't know where
    369              * the memory is going to be mapped.
    370              */
     320        page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
     321        if (RT_LIKELY(ppPages))
     322        {
     323            mutex_enter(&g_LargePageOffsetMtx);
     324            AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
     325            g_offLargePage = RT_ALIGN_64(g_offLargePage, cbLargePage) + cbLargePage;
     326            offPage        = g_offLargePage;
     327            mutex_exit(&g_LargePageOffsetMtx);
     328
    371329            seg_t KernelSeg;
    372             caddr_t virtAddr  = NULL;
    373             KernelSeg.s_as    = &kas;
    374             page_t *pRootPage = rtR0MemObjSolPageFromFreelist(virtAddr, cb);
     330            KernelSeg.s_as = &kas;
     331            page_t *pRootPage = page_create_va_large(&g_LargePageVnode, offPage, cbLargePage,
     332                                                     PG_EXCL, &KernelSeg, 0 /* vaddr */, NULL /* locality group */);
    375333            if (pRootPage)
    376334            {
    377                 AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx cPages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
    378 
    379335                /*
    380                  * Mark all the sub-pages as non-free and not-hashed-in.
    381                  * It is paramount that we destroy the list (before freeing it).
     336                 * Split it into sub-pages, downgrade each page to a shared lock to prevent page relocation.
    382337                 */
    383338                page_t *pPageList = pRootPage;
     
    388343                    AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
    389344                        ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
     345                    AssertMsg(pPage->p_szc == pRootPage->p_szc, ("Size code mismatch %p %d %d\n", pPage,
     346                                                                 (int)pPage->p_szc, (int)pRootPage->p_szc));
     347
     348                    /*
     349                     * Lock the page into memory "long term". This prevents the pageout scanner (page_try_demote_pages()) from
     350                     * demoting the large page into smaller pages while we temporarily release the exclusive lock (during free).
     351                     */
     352                    page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */);
     353
    390354                    page_sub(&pPageList, pPage);
    391 
    392                     /*
    393                      * Ensure page is now be free and the page size-code must match that of the root page.
    394                      */
    395                     AssertMsg(PP_ISFREE(pPage), ("%p\n", pPage));
    396                     AssertMsg(pPage->p_szc == pRootPage->p_szc, ("%p - %d expected %d \n", pPage, pPage->p_szc, pRootPage->p_szc));
    397 
    398                     PP_CLRFREE(pPage);      /* Page no longer free */
    399                     PP_CLRAGED(pPage);      /* Page no longer hashed-in */
     355                    page_io_unlock(pPage);
     356                    page_downgrade(pPage);
     357                    Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
     358
     359                    ppPages[iPage] = pPage;
    400360                }
     361                Assert(pPageList == NULL);
     362                Assert(ppPages[0] == pRootPage);
    401363
    402364                uint64_t uPhys = rtR0MemObjSolPagePhys(pRootPage);
    403                 AssertMsg(!(uPhys & (cb - 1)), ("%llx %zx\n", uPhys, cb));
     365                AssertMsg(!(uPhys & (cbLargePage - 1)), ("%llx %zx\n", uPhys, cbLargePage));
    404366                if (puPhys)
    405367                    *puPhys = uPhys;
    406 
    407                 return pRootPage;
     368                return ppPages;
    408369            }
    409370
    410             page_create_putback(cPages);
     371            /*
     372             * Don't restore offPrev in case of failure (race condition), we have plenty of offset space.
     373             * The offset must be unique (for the same vnode) or we'll encounter panics on page_create_va_large().
     374             */
     375            kmem_free(ppPages, cbPages);
    411376        }
    412377
    413378        page_unresv(cPages);
    414379    }
    415 
    416380    return NULL;
    417381}
     
    419383
    420384/**
    421  * Prepares the large page allocated by rtR0MemObjSolLargePageAlloc to be mapped.
    422  *
    423  * @param    pRootPage      Pointer to the root page.
    424  * @param    cb             Size of the allocation.
    425  *
    426  * @returns IPRT status code.
    427  */
    428 static int rtR0MemObjSolLargePagePreMap(page_t *pRootPage, size_t cb)
    429 {
    430     const pgcnt_t cPages = cb >> PAGESHIFT;
    431 
    432     Assert(page_get_pagecnt(pRootPage->p_szc) == cPages);
    433     AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx npages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
    434 
    435     /*
    436      * We need to downgrade the sub-pages from exclusive to shared locking
    437      * to prevent page relocation.
    438      */
    439     for (pgcnt_t iPage = 0; iPage < cPages; iPage++)
    440     {
    441         page_t *pPage = page_nextn(pRootPage, iPage);
    442         AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
    443             ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
    444         AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
    445 
    446         if (page_tryupgrade(pPage) == 1)
    447             page_downgrade(pPage);
    448         AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
    449     }
    450 
    451     return VINF_SUCCESS;
    452 }
    453 
    454 
    455 /**
    456  * Frees the page allocated by rtR0MemObjSolLargePageAlloc.
    457  *
    458  * @param    pRootPage      Pointer to the root page.
    459  * @param    cb             Allocated size.
    460  */
    461 static void rtR0MemObjSolLargePageFree(page_t *pRootPage, size_t cb)
    462 {
    463     pgcnt_t cPages = cb >> PAGESHIFT;
    464 
    465     Assert(page_get_pagecnt(pRootPage->p_szc) == cPages);
    466     AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx cPages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
    467 
    468     /*
    469      * We need to exclusively lock the sub-pages before freeing the large one.
    470      */
    471     for (pgcnt_t iPage = 0; iPage < cPages; iPage++)
    472     {
    473         page_t *pPage = page_nextn(pRootPage, iPage);
    474         AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
    475                   ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
    476         AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
    477 
     385 * Frees the large page.
     386 *
     387 * @param    ppPages        Pointer to the list of small pages that cover the
     388 *                          large page.
     389 * @param    cbLargePage    Size of the allocation (i.e. size of the large
     390 *                          page).
     391 */
     392static void rtR0MemObjSolLargePageFree(page_t **ppPages, size_t cbLargePage)
     393{
     394    Assert(ppPages);
     395    Assert(cbLargePage > PAGE_SIZE);
     396
     397    bool   fDemoted   = false;
     398    size_t cPages     = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
     399    size_t cbPages    = cPages * sizeof(page_t *);
     400    page_t *pPageList = ppPages[0];
     401
     402    for (size_t iPage = 0; iPage < cPages; iPage++)
     403    {
     404        /*
     405         * We need the pages exclusively locked, try upgrading the shared lock.
     406         * If it fails, drop the shared page lock (cannot access any page_t members once this is done)
     407         * and lookup the page from the page hash locking it exclusively.
     408         */
     409        page_t    *pPage    = ppPages[iPage];
     410        u_offset_t offPage  = pPage->p_offset;
    478411        int rc = page_tryupgrade(pPage);
    479412        if (!rc)
    480413        {
    481414            page_unlock(pPage);
    482             while (!page_lock(pPage, SE_EXCL, NULL /* mutex */, P_RECLAIM))
    483             {
    484                 /* nothing */;
    485             }
    486         }
    487     }
    488 
    489     /*
    490      * Free the large page and unreserve the memory.
    491      */
    492     page_free_pages(pRootPage);
     415            page_t *pFoundPage = page_lookup(&g_LargePageVnode, offPage, SE_EXCL);
     416            AssertRelease(pFoundPage);
     417#if 0
     418            /*
     419             * This can only be guaranteed if PG_NORELOC is used while allocating the pages.
     420             */
     421            AssertReleaseMsg(pFoundPage == pPage,
     422                             ("lookup failed %p:%llu returned %p, expected %p\n", &g_LargePageVnode, offPage,
     423                              pFoundPage, pPage));
     424#endif
     425
     426            /*
     427             * Check for page demotion (regardless of relocation). In VM1, the uncorrectable memory error scanner
     428             * does -not- respect the long-term page lock we have, so it might have demoted the page to _4K pages
     429             * while the page lock was dropped.
     430             */
     431            if (page_get_pagecnt(pFoundPage->p_szc) == 1)   /* Base size of only _4K associated with this page. */
     432                fDemoted = true;
     433
     434            ppPages[iPage] = pFoundPage;
     435            pPage          = pFoundPage;
     436        }
     437        Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
     438        page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */);
     439    }
     440
     441    if (fDemoted)
     442    {
     443        for (size_t iPage = 0; iPage < cPages; iPage++)
     444        {
     445            Assert(page_get_pagecnt(ppPages[iPage]->p_szc) == 1);
     446            page_destroy(ppPages[iPage], 0 /* move it to the free list */);
     447        }
     448    }
     449    else
     450    {
     451        /*
     452         * Although we shred the adjacent pages in the linked list, page_destroy_pages works on
     453         * adjacent pages via array increments. So this does indeed free all the pages.
     454         */
     455        AssertPtr(pPageList);
     456        page_destroy_pages(pPageList);
     457    }
     458    kmem_free(ppPages, cbPages);
    493459    page_unresv(cPages);
    494 
    495460}
    496461
     
    578543 * @returns IPRT status code.
    579544 */
    580 static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb)
     545static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb, size_t cbPageSize)
    581546{
    582547    struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as;
     
    586551    Args.paPhysAddrs = paPhysAddrs;
    587552    Args.fPageAccess = fPageAccess;
     553    Args.cbPageSize  = cbPageSize;
    588554
    589555    as_rangelock(pAddrSpace);
     
    686652    /* Allocate physically low page-aligned memory. */
    687653    uint64_t uPhysHi = _4G - 1;
    688     void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGESIZE, false /* fContig */);
     654    void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGE_SIZE, false /* fContig */);
    689655    if (RT_UNLIKELY(!pvMem))
    690656    {
     
    749715    if (s_cbLargePage == UINT32_MAX)
    750716    {
    751 #if 0 /* currently not entirely stable, so disabled. */
    752717        if (page_num_pagesizes() > 1)
    753             ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1));
     718            ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1)); /* Page-size code 1 maps to _2M on Solaris x86/amd64. */
    754719        else
    755 #endif
    756720            ASMAtomicWriteU32(&s_cbLargePage, 0);
    757721    }
     
    762726    {
    763727        /*
    764          * Allocate one large page.
     728         * Allocate one large page (backed by physically contiguous memory).
    765729         */
    766730        void *pvPages = rtR0MemObjSolLargePageAlloc(&PhysAddr, cb);
     
    782746        /*
    783747         * Allocate physically contiguous memory aligned as specified.
     748         * Note: contig_alloc() can be agonizingly slow for large (e.g. >= _2M) contiguous allocations.
     749         *      So we shouldn't ideally be in this path for large-page allocations.                                                                                              .
    784750         */
    785         AssertCompile(NIL_RTHCPHYS == UINT64_MAX);
     751        AssertCompile(NIL_RTHCPHYS == UINT64_MAX); NOREF(RTASSERTVAR);
    786752        PhysAddr = PhysHighest;
    787753        void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, uAlignment, true /* fContig */);
     
    917883
    918884
    919 DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
     885DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment,
     886                                            RTR0PROCESS R0Process)
    920887{
    921888    return VERR_NOT_SUPPORTED;
     
    959926        RTHCPHYS HCPhys = rtR0MemObjNativeGetPagePhysAddr(pMemToMap, (offSub + offSub) >> PAGE_SHIFT);
    960927        AssertBreakStmt(HCPhys != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_2);
    961         pfn_t pfn = HCPhys >> PAGESHIFT;
    962         AssertBreakStmt(((RTHCPHYS)pfn << PAGESHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3);
     928        pfn_t pfn = HCPhys >> PAGE_SHIFT;
     929        AssertBreakStmt(((RTHCPHYS)pfn << PAGE_SHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3);
    963930
    964931        hat_devload(kas.a_hat, (uint8_t *)pv + off, PAGE_SIZE, pfn, fAttr, HAT_LOAD_LOCK);
     
    1008975    void           *pv                   = pMemToMapSolaris->Core.pv;
    1009976    size_t          cb                   = pMemToMapSolaris->Core.cb;
    1010     size_t          cPages               = cb >> PAGE_SHIFT;
     977    size_t          cPages               = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
    1011978
    1012979    /*
     
    1023990    {
    1024991        /*
    1025          * Prepare the pages according to type.
     992         * Prepare the pages for mapping according to type.
    1026993         */
    1027994        if (pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS_NC)
    1028             rc = rtR0MemObjSolPagesPreMap(pMemToMapSolaris->pvHandle, cb, paPhysAddrs, cPages);
     995        {
     996            page_t **ppPages = pMemToMapSolaris->pvHandle;
     997            for (size_t iPage = 0; iPage < cPages; iPage++)
     998                paPhysAddrs[iPage] = rtR0MemObjSolPagePhys(ppPages[iPage]);
     999        }
    10291000        else if (   pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS
    10301001                 && pMemToMapSolaris->fLargePage)
    10311002        {
    10321003            RTHCPHYS Phys = pMemToMapSolaris->Core.u.Phys.PhysBase;
    1033             for (pgcnt_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE)
     1004            for (size_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE)
    10341005                paPhysAddrs[iPage] = Phys;
    1035             rc = rtR0MemObjSolLargePagePreMap(pMemToMapSolaris->pvHandle, cb);
    10361006        }
    10371007        else
     
    10661036             */
    10671037            caddr_t UserAddr = NULL;
    1068             rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb);
     1038            rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb, PAGE_SIZE);
    10691039            if (RT_SUCCESS(rc))
    10701040            {
  • trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.h

    r41146 r41622  
    4141{
    4242    uint64_t *paPhysAddrs;
     43    size_t    cbPageSize;
    4344    uint_t    fPageAccess;
    4445} SEGVBOX_CRARGS;
     
    4849{
    4950    uint_t    fPageAccess;
     51    size_t    cbPageSize;
    5052} SEGVBOX_DATA;
    5153typedef SEGVBOX_DATA *PSEGVBOX_DATA;
     
    6567    AssertPtr(pData);
    6668
     69    /*
     70     * Currently we only map _4K pages but this segment driver can handle any size
     71     * supported by the Solaris HAT layer.
     72     */
     73    size_t cbPageSize  = pArgs->cbPageSize;
     74    size_t uPageShift  = 0;
     75    switch (cbPageSize)
     76    {
     77        case _4K: uPageShift = 12; break;
     78        case _2M: uPageShift = 21; break;
     79        default:  AssertReleaseMsgFailed(("Unsupported page size for mapping cbPageSize=%llx\n", cbPageSize)); break;
     80    }
     81
    6782    hat_map(pAddrSpace->a_hat, pSeg->s_base, pSeg->s_size, HAT_MAP);
    6883    pData->fPageAccess = pArgs->fPageAccess | PROT_USER;
     84    pData->cbPageSize  = cbPageSize;
    6985
    7086    pSeg->s_ops  = &s_SegVBoxOps;
     
    7591     */
    7692    caddr_t virtAddr = pSeg->s_base;
    77     pgcnt_t cPages   = (pSeg->s_size + PAGESIZE - 1) >> PAGESHIFT;
    78     for (pgcnt_t iPage = 0; iPage < cPages; ++iPage, virtAddr += PAGESIZE)
     93    pgcnt_t cPages   = (pSeg->s_size + cbPageSize - 1) >> uPageShift;
     94    for (pgcnt_t iPage = 0; iPage < cPages; ++iPage, virtAddr += cbPageSize)
    7995    {
    80         hat_devload(pAddrSpace->a_hat, virtAddr, PAGESIZE, pArgs->paPhysAddrs[iPage] >> PAGESHIFT,
    81                     pData->fPageAccess | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
     96        hat_devload(pAddrSpace->a_hat, virtAddr, cbPageSize, pArgs->paPhysAddrs[iPage] >> uPageShift,
     97                    pData->fPageAccess | HAT_UNORDERED_OK, HAT_LOAD_LOCK);
    8298    }
    8399
     
    98114
    99115    pDstData->fPageAccess  = pSrcData->fPageAccess;
     116    pDstData->cbPageSize   = pSrcData->cbPageSize;
    100117    pDstSeg->s_ops         = &s_SegVBoxOps;
    101118    pDstSeg->s_data        = pDstData;
     
    107124static int rtR0SegVBoxSolUnmap(seg_t *pSeg, caddr_t virtAddr, size_t cb)
    108125{
    109     /** @todo make these into release assertions. */
    110     if (   virtAddr < pSeg->s_base
    111         || virtAddr + cb > pSeg->s_base + pSeg->s_size
    112         || (cb & PAGEOFFSET) || ((uintptr_t)virtAddr & PAGEOFFSET))
     126    PSEGVBOX_DATA pData = pSeg->s_data;
     127
     128    AssertRelease(pData);
     129    AssertReleaseMsg(virtAddr >= pSeg->s_base, ("virtAddr=%p s_base=%p\n", virtAddr, pSeg->s_base));
     130    AssertReleaseMsg(virtAddr + cb <= pSeg->s_base + pSeg->s_size, ("virtAddr=%p cb=%llu s_base=%p s_size=%llu\n", virtAddr,
     131                                                                    cb, pSeg->s_base, pSeg->s_size));
     132    size_t cbPageOffset = pData->cbPageSize - 1;
     133    AssertRelease(!(cb & cbPageOffset));
     134    AssertRelease(!((uintptr_t)virtAddr & cbPageOffset));
     135
     136    if (   virtAddr != pSeg->s_base
     137        || cb       != pSeg->s_size)
    113138    {
    114         panic("rtRt0SegVBoxSolUnmap");
     139        return ENOTSUP;
    115140    }
    116 
    117     if (virtAddr != pSeg->s_base || cb != pSeg->s_size)
    118         return ENOTSUP;
    119141
    120142    hat_unload(pSeg->s_as->a_hat, virtAddr, cb, HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
     
    138160     * We would demand fault if the (u)read() path would SEGOP_FAULT() on buffers mapped in via our
    139161     * segment driver i.e. prefaults before DMA. Don't fail in such case where we're called directly,
    140      * see #5047.
     162     * see @bugref{5047}.
    141163     */
    142164    return 0;
     
    176198static size_t rtR0SegVBoxSolInCore(seg_t *pSeg, caddr_t virtAddr, size_t cb, char *pVec)
    177199{
    178     size_t cbLen = (cb + PAGEOFFSET) & PAGEMASK;
    179     for (virtAddr = 0; cbLen != 0; cbLen -= PAGESIZE, virtAddr += PAGESIZE)
     200    PSEGVBOX_DATA pData = pSeg->s_data;
     201    AssertRelease(pData);
     202    size_t uPageOffset  = pData->cbPageSize - 1;
     203    size_t uPageMask    = ~uPageOffset;
     204    size_t cbLen        = (cb + uPageOffset) & uPageMask;
     205    for (virtAddr = 0; cbLen != 0; cbLen -= pData->cbPageSize, virtAddr += pData->cbPageSize)
    180206        *pVec++ = 1;
    181207    return cbLen;
     
    295321#endif /* !___r0drv_solaris_memobj_r0drv_solaris_h */
    296322
    297 
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette