Changeset 41622 in vbox

Timestamp:

Jun 8, 2012 1:01:44 PM (13 years ago)

Author:

vboxsync

svn:sync-xref-src-repo-rev:

78419

Message:

Runtime/r0drv/Solaris: Implemented large page support. Fixed possible freeing of wrong pages with regular 4K pages.

Location:

trunk/src/VBox/Runtime/r0drv/solaris

Files:

: 2 edited

memobj-r0drv-solaris.c (modified) (24 diffs)
memobj-r0drv-solaris.h (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c

-              r41146
+              r41622
 /*
  * Copyright (C) 2006-2007 Oracle Corporation
+ * Copyright (C) 2006-2012 Oracle Corporation
+ *
  * This file is part of VirtualBox Open Source Edition (OSE), as
 …
 *******************************************************************************/
 static vnode_t                  g_PageVnode;
+static kmutex_t                 g_OffsetMtx;
+static u_offset_t               g_offPage;
+static vnode_t                  g_LargePageVnode;
+static kmutex_t                 g_LargePageOffsetMtx;
+static u_offset_t               g_offLargePage;
 …
     PageFrameNum = hat_getpfnum(pHat, (caddr_t)(uVirtAddr & PAGEMASK));
     AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolVirtToPhys failed. pv=%p\n", pv));
     return (((uint64_t)PageFrameNum << PAGESHIFT) | (uVirtAddr & PAGEOFFSET));
+    return (((uint64_t)PageFrameNum << PAGE_SHIFT) | (uVirtAddr & PAGE_OFFSET_MASK));
+}
 …
     pfn_t PageFrameNum = page_pptonum(pPage);
     AssertReleaseMsg(PageFrameNum != PFN_INVALID, ("rtR0MemObjSolPagePhys failed pPage=%p\n"));
     return (uint64_t)PageFrameNum << PAGESHIFT;
+    return (uint64_t)PageFrameNum << PAGE_SHIFT;
+}
 /**
  * Retreives a free page from the kernel freelist.
+ * Allocates one page.
+ *
  * @param virtAddr       The virtual address to which this page maybe mapped in
 …
  * @returns Pointer to the allocated page, NULL on failure.
  */
+static page_t *rtR0MemObjSolPageFromFreelist(caddr_t virtAddr, size_t cbPage)
+{
+    seg_t KernelSeg;
+static page_t *rtR0MemObjSolPageAlloc(caddr_t virtAddr, size_t cbPage)
+{
+    Assert(cbPage == PAGE_SIZE);
+    u_offset_t offPage;
+    seg_t      KernelSeg;
+    mutex_enter(&g_OffsetMtx);
+    AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
+    g_offPage = RT_ALIGN_64(g_offPage, cbPage) + cbPage;
+    offPage   = g_offPage;
+    mutex_exit(&g_OffsetMtx);
     KernelSeg.s_as = &kas;
+    page_t *pPage = page_get_freelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
+                                      cbPage, 0 /* flags */, NULL /* NUMA group */);
+    if (   !pPage
+        && g_frtSolUseKflt)
+    {
+        pPage = page_get_freelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
+                                  cbPage, PG_KFLT, NULL /* NUMA group */);
+    }
+    page_t *pPage = page_create_va(&g_PageVnode, offPage, cbPage, PG_WAIT | PG_NORELOC, &KernelSeg, virtAddr);
     return pPage;
+}
 …
 /**
+ * Retrieves a free page from the kernel cachelist.
+ *
+ * @param virtAddr      The virtual address to which this page maybe mapped in
+ *                      the future.
+ * @param cbPage        The size of the page.
+ *
+ * @return Pointer to the allocated page, NULL on failure.
+ */
+static page_t *rtR0MemObjSolPageFromCachelist(caddr_t virtAddr, size_t cbPage)
+{
+    seg_t KernelSeg;
+    KernelSeg.s_as = &kas;
+    page_t *pPage = page_get_cachelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
+/* flags */, NULL /* NUMA group */);
+    if (   !pPage
+        && g_frtSolUseKflt)
+    {
+        pPage = page_get_cachelist(&g_PageVnode, 0 /* offset */, &KernelSeg, virtAddr,
+                                   PG_KFLT, NULL /* NUMA group */);
+    }
+    /*
+     * Remove association with the vnode for pages from the cachelist.
+     */
+    if (!PP_ISAGED(pPage))
+        page_hashout(pPage, NULL /* mutex */);
+    return pPage;
+}
+/**
+ * Allocates physical non-contiguous memory.
+ * Allocates physical, non-contiguous memory of pages.
+ *
  * @param uPhysHi   The upper physical address limit (inclusive).
 …
+{
     /*
+     * VM1:
      * The page freelist and cachelist both hold pages that are not mapped into any address space.
      * The cachelist is not really free pages but when memory is exhausted they'll be moved to the
      * free lists, it's the total of the free+cache list that we see on the 'free' column in vmstat.
+     *
+     * Reserve available memory for pages and create the pages.
+     * VM2:
+     * @todo Document what happens behind the scenes in VM2 regarding the free and cachelist.
      */
+    pgcnt_t cPages = (cb + PAGESIZE - 1) >> PAGESHIFT;
+    /*
+     * Non-pageable memory reservation request for _4K pages, don't sleep.
+     */
+    pgcnt_t cPages = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
     int rc = page_resv(cPages, KM_NOSLEEP);
     if (rc)
+    {
+        rc = page_create_wait(cPages, 0 /* flags */);
+        if (rc)
+        {
+            size_t   cbPages = cPages * sizeof(page_t *);
+            page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
+            if (RT_LIKELY(ppPages))
+        size_t   cbPages = cPages * sizeof(page_t *);
+        page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
+        if (RT_LIKELY(ppPages))
+        {
+            /*
+             * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
+             * we don't yet have the 'virtAddr' to which this memory may be mapped.
+             */
+            caddr_t virtAddr = NULL;
+            for (size_t i = 0; i < cPages; i++, virtAddr += PAGE_SIZE)
+            {
+                /*
+                 * Get pages from kseg, the 'virtAddr' here is only for colouring but unfortunately
+                 * we don't yet have the 'virtAddr' to which this memory may be mapped.
+                 */
+                caddr_t virtAddr = NULL;
+                for (size_t i = 0; i < cPages; i++, virtAddr += PAGESIZE)
+                uint32_t cTries = 3;
+                page_t *pPage   = NULL;
+                while (cTries > 0)
+                {
+                    uint32_t cTries = 3;
+                    page_t *pPage   = NULL;
+                    while (cTries > 0)
+                    /*
+                     * Get a page from the free list locked exclusively. The page will be named (hashed in).
+                     * Hashing out the page has no real benefits. Downgrade the page to a shared lock to                                     .
+                     * prevent the page from being relocated.
+                     */
+                    pPage = rtR0MemObjSolPageAlloc(virtAddr, PAGE_SIZE);
+                    if (!pPage)
+                        break;
+                    page_io_unlock(pPage);
+                    page_downgrade(pPage);
+                    Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
+                    /*
+                     * Check if the physical address backing the page is within the requested range if any.
+                     * If it isn't, discard the page and try again.
+                     */
+                    if (uPhysHi != NIL_RTHCPHYS)
+                    {
+                        /*
+                         * Get a page from the freelist or cachelist & verify if it's within our
+                         * requested range.
+                         */
+                        pPage = rtR0MemObjSolPageFromFreelist(virtAddr, PAGESIZE);
+                        if (!pPage)
+                        uint64_t uPhys = rtR0MemObjSolPagePhys(pPage);
+                        if (uPhys > uPhysHi)
+                        {
+                            pPage = rtR0MemObjSolPageFromCachelist(virtAddr, PAGESIZE);
+                            if (RT_UNLIKELY(!pPage))
+                                break;
+                            page_destroy(pPage, 0 /* move it to the free list */);
+                            pPage = NULL;
+                            --cTries;
+                            continue;
+                        }
-                        if (uPhysHi != NIL_RTHCPHYS)
+                        {
-                            uint64_t uPhys = rtR0MemObjSolPagePhys(pPage);
-                            if (uPhys > uPhysHi)
+                            {
-                                page_free(pPage, 0 /* don't need page, move to tail of pagelist */);
-                                pPage = NULL;
-                                --cTries;
-                                continue;
+                            }
+                        }
-                        PP_CLRFREE(pPage);      /* Page is no longer free */
-                        PP_CLRAGED(pPage);      /* Page is not hashed in */
-                        ppPages[i] = pPage;
-                        break;
+                    }
+                    if (RT_UNLIKELY(!pPage))
+                    {
+                        /*
+                         * No pages found or found pages didn't meet requirements, release what was grabbed so far.
+                         */
+                        page_create_putback(cPages - i);
+                        while (--i >= 0)
+                            page_free(ppPages[i], 0 /* don't need page, move to tail of pagelist */);
+                        kmem_free(ppPages, cbPages);
+                        page_unresv(cPages);
+                        return NULL;
+                    }
+                    ppPages[i] = pPage;
+                    break;
+                }
+                /*
+                 * We now have the pages locked exclusively, before they are mapped in
+                 * we must downgrade the lock.
+                 */
+                if (puPhys)
+                    *puPhys = rtR0MemObjSolPagePhys(ppPages[0]);
+                return ppPages;
+                if (RT_UNLIKELY(!pPage))
+                {
+                    /*
+                     * No pages found or found pages didn't meet requirements, release what was grabbed so far.
+                     */
+                    while (--i >= 0)
+                        page_destroy(ppPages[i], 0 /* move it to the free list */);
+                    kmem_free(ppPages, cbPages);
+                    page_unresv(cPages);
+                    return NULL;
+                }
+            }
+            page_create_putback(cPages);
+            if (puPhys)
+                *puPhys = rtR0MemObjSolPagePhys(ppPages[0]);
+            return ppPages;
+        }
 …
 /**
+ * Prepares pages allocated by rtR0MemObjSolPagesAlloc for mapping.
+ *
+ * @param    ppPages    Pointer to the page list.
+ * @param    cb         Size of the allocation.
+ * @param    auPhys     Where to store the physical address of the premapped
+ *                      pages.
+ * @param    cPages     The number of pages (entries) in @a auPhys.
+ *
+ * @returns IPRT status code.
+ */
+static int rtR0MemObjSolPagesPreMap(page_t **ppPages, size_t cb, uint64_t auPhys[], size_t cPages)
+{
+    AssertPtrReturn(ppPages, VERR_INVALID_PARAMETER);
+    AssertPtrReturn(auPhys, VERR_INVALID_PARAMETER);
+    for (size_t iPage = 0; iPage < cPages; iPage++)
+    {
+        /*
+         * Prepare pages for mapping into kernel/user-space. Downgrade the
+         * exclusive page lock to a shared lock to prevent page relocation.
+         */
+        if (page_tryupgrade(ppPages[iPage]) == 1)
+            page_downgrade(ppPages[iPage]);
+        auPhys[iPage] = rtR0MemObjSolPagePhys(ppPages[iPage]);
+    }
+    return VINF_SUCCESS;
+}
+/**
+ * Frees pages allocated by rtR0MemObjSolPagesAlloc.
+ * Frees the allocates pages.
+ *
  * @param ppPages       Pointer to the page list.
 …
 static void rtR0MemObjSolPagesFree(page_t **ppPages, size_t cb)
+{
     size_t cPages  = (cb + PAGESIZE - 1) >> PAGESHIFT;
+    size_t cPages  = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
     size_t cbPages = cPages * sizeof(page_t *);
     for (size_t iPage = 0; iPage < cPages; iPage++)
 …
          *  We need to exclusive lock the pages before freeing them.
          */
+        page_t     *pPage  = ppPages[iPage];
+        u_offset_t offPage = pPage->p_offset;
         int rc = page_tryupgrade(ppPages[iPage]);
         if (!rc)
+        {
+            page_unlock(ppPages[iPage]);
+            while (!page_lock(ppPages[iPage], SE_EXCL, NULL /* mutex */, P_RECLAIM))
+            {
+                /* nothing */;
+            }
+        }
+        page_free(ppPages[iPage], 0 /* don't need page, move to tail of pagelist */);
+            page_unlock(pPage);
+            page_t *pFoundPage = page_lookup(&g_PageVnode, offPage, SE_EXCL);
+            /*
+             * Since we allocated the pages as PG_NORELOC we should only get back the exact page always.
+             */
+            AssertReleaseMsg(pFoundPage == pPage, ("Page lookup failed %p:%llx returned %p, expected %p\n",
+                                                   &g_PageVnode, offPage, pFoundPage, pPage));
+        }
+        Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
+        page_destroy(pPage, 0 /* move it to the free list */);
+    }
     kmem_free(ppPages, cbPages);
 …
 /**
+ * Allocates a large page to cover the required allocation size.
+ * Allocates one large page. There is currently no way on Solaris to request
+ * a block larger than one page backed with physically contiguous memory, i.e.
+ * PG_PHYSCONTIG is not yet supported.
+ *
  * @param puPhys        Where to store the physical address of the allocated
  *                      page. Optional, can be NULL.
+ * @param cb            Size of the allocation.
+ *
+ * @returns Pointer to the allocated large page, NULL on failure.
+ */
+static page_t *rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cb)
+ * @param cbLargePage   Size of the large page.
+ *
+ * @returns Pointer to a list of pages that cover the large page, NULL on
+ *        failure.
+ */
+static page_t **rtR0MemObjSolLargePageAlloc(uint64_t *puPhys, size_t cbLargePage)
+{
     /*
      * Reserve available memory and create the sub-pages.
+     * Non-pageable memory reservation request for _4K pages, don't sleep.
      */
+    const pgcnt_t cPages = cb >> PAGESHIFT;
+    size_t cPages       = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
+    size_t cbPages      = cPages * sizeof(page_t *);
+    u_offset_t offPage  = 0;
     int rc = page_resv(cPages, KM_NOSLEEP);
     if (rc)
+    {
+        rc = page_create_wait(cPages, 0 /* flags */);
+        if (rc)
+        {
+            /*
+             * Get a page off the free list. We set virtAddr to 0 since we don't know where
+             * the memory is going to be mapped.
+             */
+        page_t **ppPages = kmem_zalloc(cbPages, KM_SLEEP);
+        if (RT_LIKELY(ppPages))
+        {
+            mutex_enter(&g_LargePageOffsetMtx);
+            AssertCompileSize(u_offset_t, sizeof(uint64_t)); NOREF(RTASSERTVAR);
+            g_offLargePage = RT_ALIGN_64(g_offLargePage, cbLargePage) + cbLargePage;
+            offPage        = g_offLargePage;
+            mutex_exit(&g_LargePageOffsetMtx);
             seg_t KernelSeg;
             caddr_t virtAddr  = NULL;
             KernelSeg.s_as    = &kas;
             page_t *pRootPage = rtR0MemObjSolPageFromFreelist(virtAddr, cb);
+            KernelSeg.s_as = &kas;
+            page_t *pRootPage = page_create_va_large(&g_LargePageVnode, offPage, cbLargePage,
+                                                     PG_EXCL, &KernelSeg, 0 /* vaddr */, NULL /* locality group */);
             if (pRootPage)
+            {
-                AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx cPages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
                 /*
+                 * Mark all the sub-pages as non-free and not-hashed-in.
+                 * It is paramount that we destroy the list (before freeing it).
+                 * Split it into sub-pages, downgrade each page to a shared lock to prevent page relocation.
                  */
                 page_t *pPageList = pRootPage;
 …
                     AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
                         ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
+                    AssertMsg(pPage->p_szc == pRootPage->p_szc, ("Size code mismatch %p %d %d\n", pPage,
+                                                                 (int)pPage->p_szc, (int)pRootPage->p_szc));
+                    /*
+                     * Lock the page into memory "long term". This prevents the pageout scanner (page_try_demote_pages()) from
+                     * demoting the large page into smaller pages while we temporarily release the exclusive lock (during free).
+                     */
+                    page_pp_lock(pPage, 0 /* COW */, 1 /* Kernel */);
                     page_sub(&pPageList, pPage);
+                    /*
+                     * Ensure page is now be free and the page size-code must match that of the root page.
+                     */
+                    AssertMsg(PP_ISFREE(pPage), ("%p\n", pPage));
+                    AssertMsg(pPage->p_szc == pRootPage->p_szc, ("%p - %d expected %d \n", pPage, pPage->p_szc, pRootPage->p_szc));
+                    PP_CLRFREE(pPage);      /* Page no longer free */
+                    PP_CLRAGED(pPage);      /* Page no longer hashed-in */
+                    page_io_unlock(pPage);
+                    page_downgrade(pPage);
+                    Assert(PAGE_LOCKED_SE(pPage, SE_SHARED));
+                    ppPages[iPage] = pPage;
+                }
+                Assert(pPageList == NULL);
+                Assert(ppPages[0] == pRootPage);
                 uint64_t uPhys = rtR0MemObjSolPagePhys(pRootPage);
                 AssertMsg(!(uPhys & (cb - 1)), ("%llx %zx\n", uPhys, cb));
+                AssertMsg(!(uPhys & (cbLargePage - 1)), ("%llx %zx\n", uPhys, cbLargePage));
                 if (puPhys)
                     *puPhys = uPhys;
+                return pRootPage;
+                return ppPages;
+            }
+            page_create_putback(cPages);
+            /*
+             * Don't restore offPrev in case of failure (race condition), we have plenty of offset space.
+             * The offset must be unique (for the same vnode) or we'll encounter panics on page_create_va_large().
+             */
+            kmem_free(ppPages, cbPages);
+        }
         page_unresv(cPages);
+    }
     return NULL;
+}
 …
 /**
+ * Prepares the large page allocated by rtR0MemObjSolLargePageAlloc to be mapped.
+ *
+ * @param    pRootPage      Pointer to the root page.
+ * @param    cb             Size of the allocation.
+ *
+ * @returns IPRT status code.
+ */
+static int rtR0MemObjSolLargePagePreMap(page_t *pRootPage, size_t cb)
+{
+    const pgcnt_t cPages = cb >> PAGESHIFT;
+    Assert(page_get_pagecnt(pRootPage->p_szc) == cPages);
+    AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx npages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
+    /*
+     * We need to downgrade the sub-pages from exclusive to shared locking
+     * to prevent page relocation.
+     */
+    for (pgcnt_t iPage = 0; iPage < cPages; iPage++)
+    {
+        page_t *pPage = page_nextn(pRootPage, iPage);
+        AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
+            ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
+        AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
+        if (page_tryupgrade(pPage) == 1)
+            page_downgrade(pPage);
+        AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
+    }
+    return VINF_SUCCESS;
+}
+/**
+ * Frees the page allocated by rtR0MemObjSolLargePageAlloc.
+ *
+ * @param    pRootPage      Pointer to the root page.
+ * @param    cb             Allocated size.
+ */
+static void rtR0MemObjSolLargePageFree(page_t *pRootPage, size_t cb)
+{
+    pgcnt_t cPages = cb >> PAGESHIFT;
+    Assert(page_get_pagecnt(pRootPage->p_szc) == cPages);
+    AssertMsg(!(page_pptonum(pRootPage) & (cPages - 1)), ("%p:%lx cPages=%lx\n", pRootPage, page_pptonum(pRootPage), cPages));
+    /*
+     * We need to exclusively lock the sub-pages before freeing the large one.
+     */
+    for (pgcnt_t iPage = 0; iPage < cPages; iPage++)
+    {
+        page_t *pPage = page_nextn(pRootPage, iPage);
+        AssertMsg(page_pptonum(pPage) == iPage + page_pptonum(pRootPage),
+                  ("%p:%lx %lx+%lx\n", pPage, page_pptonum(pPage), iPage, page_pptonum(pRootPage)));
+        AssertMsg(!PP_ISFREE(pPage), ("%p\n", pPage));
+ * Frees the large page.
+ *
+ * @param    ppPages        Pointer to the list of small pages that cover the
+ *                          large page.
+ * @param    cbLargePage    Size of the allocation (i.e. size of the large
+ *                          page).
+ */
+static void rtR0MemObjSolLargePageFree(page_t **ppPages, size_t cbLargePage)
+{
+    Assert(ppPages);
+    Assert(cbLargePage > PAGE_SIZE);
+    bool   fDemoted   = false;
+    size_t cPages     = (cbLargePage + PAGE_SIZE - 1) >> PAGE_SHIFT;
+    size_t cbPages    = cPages * sizeof(page_t *);
+    page_t *pPageList = ppPages[0];
+    for (size_t iPage = 0; iPage < cPages; iPage++)
+    {
+        /*
+         * We need the pages exclusively locked, try upgrading the shared lock.
+         * If it fails, drop the shared page lock (cannot access any page_t members once this is done)
+         * and lookup the page from the page hash locking it exclusively.
+         */
+        page_t    *pPage    = ppPages[iPage];
+        u_offset_t offPage  = pPage->p_offset;
         int rc = page_tryupgrade(pPage);
         if (!rc)
+        {
             page_unlock(pPage);
+            while (!page_lock(pPage, SE_EXCL, NULL /* mutex */, P_RECLAIM))
+            {
+                /* nothing */;
+            }
+        }
+    }
+    /*
+     * Free the large page and unreserve the memory.
+     */
+    page_free_pages(pRootPage);
+            page_t *pFoundPage = page_lookup(&g_LargePageVnode, offPage, SE_EXCL);
+            AssertRelease(pFoundPage);
+#if 0
+            /*
+             * This can only be guaranteed if PG_NORELOC is used while allocating the pages.
+             */
+            AssertReleaseMsg(pFoundPage == pPage,
+                             ("lookup failed %p:%llu returned %p, expected %p\n", &g_LargePageVnode, offPage,
+                              pFoundPage, pPage));
+#endif
+            /*
+             * Check for page demotion (regardless of relocation). In VM1, the uncorrectable memory error scanner
+             * does -not- respect the long-term page lock we have, so it might have demoted the page to _4K pages
+             * while the page lock was dropped.
+             */
+            if (page_get_pagecnt(pFoundPage->p_szc) == 1)   /* Base size of only _4K associated with this page. */
+                fDemoted = true;
+            ppPages[iPage] = pFoundPage;
+            pPage          = pFoundPage;
+        }
+        Assert(PAGE_LOCKED_SE(pPage, SE_EXCL));
+        page_pp_unlock(pPage, 0 /* COW */, 1 /* Kernel */);
+    }
+    if (fDemoted)
+    {
+        for (size_t iPage = 0; iPage < cPages; iPage++)
+        {
+            Assert(page_get_pagecnt(ppPages[iPage]->p_szc) == 1);
+            page_destroy(ppPages[iPage], 0 /* move it to the free list */);
+        }
+    }
+    else
+    {
+        /*
+         * Although we shred the adjacent pages in the linked list, page_destroy_pages works on
+         * adjacent pages via array increments. So this does indeed free all the pages.
+         */
+        AssertPtr(pPageList);
+        page_destroy_pages(pPageList);
+    }
+    kmem_free(ppPages, cbPages);
     page_unresv(cPages);
+}
 …
  * @returns IPRT status code.
  */
 static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb)
+static int rtR0MemObjSolUserMap(caddr_t *pVirtAddr, unsigned fPageAccess, uint64_t *paPhysAddrs, size_t cb, size_t cbPageSize)
+{
     struct as *pAddrSpace = ((proc_t *)RTR0ProcHandleSelf())->p_as;
 …
     Args.paPhysAddrs = paPhysAddrs;
     Args.fPageAccess = fPageAccess;
+    Args.cbPageSize  = cbPageSize;
     as_rangelock(pAddrSpace);
 …
     /* Allocate physically low page-aligned memory. */
     uint64_t uPhysHi = _4G - 1;
     void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGESIZE, false /* fContig */);
+    void *pvMem = rtR0SolMemAlloc(uPhysHi, NULL /* puPhys */, cb, PAGE_SIZE, false /* fContig */);
     if (RT_UNLIKELY(!pvMem))
+    {
 …
     if (s_cbLargePage == UINT32_MAX)
+    {
-#if 0 /* currently not entirely stable, so disabled. */
         if (page_num_pagesizes() > 1)
             ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1));
+            ASMAtomicWriteU32(&s_cbLargePage, page_get_pagesize(1)); /* Page-size code 1 maps to _2M on Solaris x86/amd64. */
         else
-#endif
             ASMAtomicWriteU32(&s_cbLargePage, 0);
+    }
 …
+    {
         /*
          * Allocate one large page.
+         * Allocate one large page (backed by physically contiguous memory).
          */
         void *pvPages = rtR0MemObjSolLargePageAlloc(&PhysAddr, cb);
 …
         /*
          * Allocate physically contiguous memory aligned as specified.
+         * Note: contig_alloc() can be agonizingly slow for large (e.g. >= _2M) contiguous allocations.
+         *      So we shouldn't ideally be in this path for large-page allocations.                                                                                              .
          */
         AssertCompile(NIL_RTHCPHYS == UINT64_MAX);
+        AssertCompile(NIL_RTHCPHYS == UINT64_MAX); NOREF(RTASSERTVAR);
         PhysAddr = PhysHighest;
         void *pvMem = rtR0SolMemAlloc(PhysHighest, &PhysAddr, cb, uAlignment, true /* fContig */);
 …
+DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, RTR0PROCESS R0Process)
+DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment,
+                                            RTR0PROCESS R0Process)
+{
     return VERR_NOT_SUPPORTED;
 …
         RTHCPHYS HCPhys = rtR0MemObjNativeGetPagePhysAddr(pMemToMap, (offSub + offSub) >> PAGE_SHIFT);
         AssertBreakStmt(HCPhys != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_2);
         pfn_t pfn = HCPhys >> PAGESHIFT;
         AssertBreakStmt(((RTHCPHYS)pfn << PAGESHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3);
+        pfn_t pfn = HCPhys >> PAGE_SHIFT;
+        AssertBreakStmt(((RTHCPHYS)pfn << PAGE_SHIFT) == HCPhys, rc = VERR_INTERNAL_ERROR_3);
         hat_devload(kas.a_hat, (uint8_t *)pv + off, PAGE_SIZE, pfn, fAttr, HAT_LOAD_LOCK);
 …
     void           *pv                   = pMemToMapSolaris->Core.pv;
     size_t          cb                   = pMemToMapSolaris->Core.cb;
     size_t          cPages               = cb >> PAGE_SHIFT;
+    size_t          cPages               = (cb + PAGE_SIZE - 1) >> PAGE_SHIFT;
     /*
 …
+    {
         /*
          * Prepare the pages according to type.
+         * Prepare the pages for mapping according to type.
          */
         if (pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS_NC)
+            rc = rtR0MemObjSolPagesPreMap(pMemToMapSolaris->pvHandle, cb, paPhysAddrs, cPages);
+        {
+            page_t **ppPages = pMemToMapSolaris->pvHandle;
+            for (size_t iPage = 0; iPage < cPages; iPage++)
+                paPhysAddrs[iPage] = rtR0MemObjSolPagePhys(ppPages[iPage]);
+        }
         else if (   pMemToMapSolaris->Core.enmType == RTR0MEMOBJTYPE_PHYS
                  && pMemToMapSolaris->fLargePage)
+        {
             RTHCPHYS Phys = pMemToMapSolaris->Core.u.Phys.PhysBase;
             for (pgcnt_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE)
+            for (size_t iPage = 0; iPage < cPages; iPage++, Phys += PAGE_SIZE)
                 paPhysAddrs[iPage] = Phys;
-            rc = rtR0MemObjSolLargePagePreMap(pMemToMapSolaris->pvHandle, cb);
+        }
         else
 …
              */
             caddr_t UserAddr = NULL;
             rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb);
+            rc = rtR0MemObjSolUserMap(&UserAddr, fPageAccess, paPhysAddrs, cb, PAGE_SIZE);
             if (RT_SUCCESS(rc))
+            {

trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.h

-              r41146
+              r41622
+{
     uint64_t *paPhysAddrs;
+    size_t    cbPageSize;
     uint_t    fPageAccess;
 } SEGVBOX_CRARGS;
 …
+{
     uint_t    fPageAccess;
+    size_t    cbPageSize;
 } SEGVBOX_DATA;
 typedef SEGVBOX_DATA *PSEGVBOX_DATA;
 …
     AssertPtr(pData);
+    /*
+     * Currently we only map _4K pages but this segment driver can handle any size
+     * supported by the Solaris HAT layer.
+     */
+    size_t cbPageSize  = pArgs->cbPageSize;
+    size_t uPageShift  = 0;
+    switch (cbPageSize)
+    {
+        case _4K: uPageShift = 12; break;
+        case _2M: uPageShift = 21; break;
+        default:  AssertReleaseMsgFailed(("Unsupported page size for mapping cbPageSize=%llx\n", cbPageSize)); break;
+    }
     hat_map(pAddrSpace->a_hat, pSeg->s_base, pSeg->s_size, HAT_MAP);
     pData->fPageAccess = pArgs->fPageAccess | PROT_USER;
+    pData->cbPageSize  = cbPageSize;
     pSeg->s_ops  = &s_SegVBoxOps;
 …
      */
     caddr_t virtAddr = pSeg->s_base;
     pgcnt_t cPages   = (pSeg->s_size + PAGESIZE - 1) >> PAGESHIFT;
     for (pgcnt_t iPage = 0; iPage < cPages; ++iPage, virtAddr += PAGESIZE)
+    pgcnt_t cPages   = (pSeg->s_size + cbPageSize - 1) >> uPageShift;
+    for (pgcnt_t iPage = 0; iPage < cPages; ++iPage, virtAddr += cbPageSize)
+    {
         hat_devload(pAddrSpace->a_hat, virtAddr, PAGESIZE, pArgs->paPhysAddrs[iPage] >> PAGESHIFT,
                     pData->fPageAccess | HAT_UNORDERED_OK, HAT_LOAD | HAT_LOAD_LOCK);
+        hat_devload(pAddrSpace->a_hat, virtAddr, cbPageSize, pArgs->paPhysAddrs[iPage] >> uPageShift,
+                    pData->fPageAccess | HAT_UNORDERED_OK, HAT_LOAD_LOCK);
+    }
 …
     pDstData->fPageAccess  = pSrcData->fPageAccess;
+    pDstData->cbPageSize   = pSrcData->cbPageSize;
     pDstSeg->s_ops         = &s_SegVBoxOps;
     pDstSeg->s_data        = pDstData;
 …
 static int rtR0SegVBoxSolUnmap(seg_t *pSeg, caddr_t virtAddr, size_t cb)
+{
+    /** @todo make these into release assertions. */
+    if (   virtAddr < pSeg->s_base
+        || virtAddr + cb > pSeg->s_base + pSeg->s_size
+        || (cb & PAGEOFFSET) || ((uintptr_t)virtAddr & PAGEOFFSET))
+    PSEGVBOX_DATA pData = pSeg->s_data;
+    AssertRelease(pData);
+    AssertReleaseMsg(virtAddr >= pSeg->s_base, ("virtAddr=%p s_base=%p\n", virtAddr, pSeg->s_base));
+    AssertReleaseMsg(virtAddr + cb <= pSeg->s_base + pSeg->s_size, ("virtAddr=%p cb=%llu s_base=%p s_size=%llu\n", virtAddr,
+                                                                    cb, pSeg->s_base, pSeg->s_size));
+    size_t cbPageOffset = pData->cbPageSize - 1;
+    AssertRelease(!(cb & cbPageOffset));
+    AssertRelease(!((uintptr_t)virtAddr & cbPageOffset));
+    if (   virtAddr != pSeg->s_base
+        || cb       != pSeg->s_size)
+    {
         panic("rtRt0SegVBoxSolUnmap");
+        return ENOTSUP;
+    }
-    if (virtAddr != pSeg->s_base || cb != pSeg->s_size)
-        return ENOTSUP;
     hat_unload(pSeg->s_as->a_hat, virtAddr, cb, HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
 …
      * We would demand fault if the (u)read() path would SEGOP_FAULT() on buffers mapped in via our
      * segment driver i.e. prefaults before DMA. Don't fail in such case where we're called directly,
      * see #5047.
+     * see @bugref{5047}.
      */
     return 0;
 …
 static size_t rtR0SegVBoxSolInCore(seg_t *pSeg, caddr_t virtAddr, size_t cb, char *pVec)
+{
+    size_t cbLen = (cb + PAGEOFFSET) & PAGEMASK;
+    for (virtAddr = 0; cbLen != 0; cbLen -= PAGESIZE, virtAddr += PAGESIZE)
+    PSEGVBOX_DATA pData = pSeg->s_data;
+    AssertRelease(pData);
+    size_t uPageOffset  = pData->cbPageSize - 1;
+    size_t uPageMask    = ~uPageOffset;
+    size_t cbLen        = (cb + uPageOffset) & uPageMask;
+    for (virtAddr = 0; cbLen != 0; cbLen -= pData->cbPageSize, virtAddr += pData->cbPageSize)
         *pVec++ = 1;
     return cbLen;
 …
 #endif /* !___r0drv_solaris_memobj_r0drv_solaris_h */

Note: See TracChangeset for help on using the changeset viewer.

Changeset 41622 in vbox

Legend:

trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.c

trunk/src/VBox/Runtime/r0drv/solaris/memobj-r0drv-solaris.h

Download in other formats: