VirtualBox

Changeset 77939 in vbox for trunk/src/VBox/Additions/linux


Ignore:
Timestamp:
Mar 28, 2019 3:56:44 PM (6 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
129659
Message:

linux/vboxsf: Reimplemented vbsf_splice_read to make both sendfile and splice() work more correctly on 2.6.23 - 2.6.30. bugref:9172

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Additions/linux/sharedfolders/regops.c

    r77880 r77939  
    497497
    498498
    499 
    500499/*********************************************************************************************************************************
    501 *   Pipe / splice stuff for 2.6.23 >= linux < 2.6.31 (figure out why we need this)                                               *
    502 *********************************************************************************************************************************/
    503 
    504 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
    505  && LINUX_VERSION_CODE <  KERNEL_VERSION(2, 6, 31)
    506 
    507 /*
    508  * Some pipe stuff we apparently need for 2.6.23-2.6.30.
    509  */
    510 
    511 static void vbsf_free_pipebuf(struct page *kpage)
    512 {
    513     kunmap(kpage);
    514     __free_pages(kpage, 0);
    515 }
    516 
    517 static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, int atomic)
    518 {
    519     return 0;
    520 }
    521 
    522 static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
    523 {
    524 }
    525 
    526 static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf, void *map_data)
    527 {
    528 }
    529 
    530 static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
    531 {
    532     return 0;
    533 }
    534 
    535 static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pipe_buf)
    536 {
    537     vbsf_free_pipebuf(pipe_buf->page);
    538 }
    539 
    540 static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pipe_buf)
    541 {
    542     return 0;
    543 }
    544 
    545 static struct pipe_buf_operations vbsf_pipe_buf_ops = {
    546     .can_merge = 0,
    547     .map = vbsf_pipe_buf_map,
    548     .unmap = vbsf_pipe_buf_unmap,
    549     .confirm = vbsf_pipe_buf_confirm,
    550     .release = vbsf_pipe_buf_release,
    551     .steal = vbsf_pipe_buf_steal,
    552     .get = vbsf_pipe_buf_get,
    553 };
    554 
    555 static int vbsf_reg_read_aux(const char *caller, struct vbsf_super_info *sf_g, struct vbsf_reg_info *sf_r,
    556                              void *buf, uint32_t *nread, uint64_t pos)
    557 {
    558     int rc = VbglR0SfRead(&g_SfClient, &sf_g->map, sf_r->Handle.hHost, pos, nread, buf, false /* already locked? */ );
    559     if (RT_FAILURE(rc)) {
    560         LogFunc(("VbglR0SfRead failed. caller=%s, rc=%Rrc\n", caller,
    561              rc));
    562         return -EPROTO;
    563     }
    564     return 0;
    565 }
    566 
    567 # define LOCK_PIPE(pipe)   do { if (pipe->inode) mutex_lock(&pipe->inode->i_mutex); } while (0)
    568 # define UNLOCK_PIPE(pipe) do { if (pipe->inode) mutex_unlock(&pipe->inode->i_mutex); } while (0)
    569 
    570 ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
    571 {
    572     size_t bytes_remaining = len;
    573     loff_t orig_offset = *poffset;
    574     loff_t offset = orig_offset;
    575     struct inode *inode = VBSF_GET_F_DENTRY(in)->d_inode;
    576     struct vbsf_super_info *sf_g = VBSF_GET_SUPER_INFO(inode->i_sb);
    577     struct vbsf_reg_info *sf_r = in->private_data;
    578     ssize_t retval;
    579     struct page *kpage = 0;
    580     size_t nsent = 0;
    581 
    582 /** @todo rig up a FsPerf test for this code  */
    583     TRACE();
    584     if (!S_ISREG(inode->i_mode)) {
    585         LogFunc(("read from non regular file %d\n", inode->i_mode));
    586         return -EINVAL;
    587     }
    588     if (!len) {
    589         return 0;
    590     }
    591 
    592     LOCK_PIPE(pipe);
    593 
    594     uint32_t req_size = 0;
    595     while (bytes_remaining > 0) {
    596         kpage = alloc_page(GFP_KERNEL);
    597         if (unlikely(kpage == NULL)) {
    598             UNLOCK_PIPE(pipe);
    599             return -ENOMEM;
    600         }
    601         req_size = 0;
    602         uint32_t nread = req_size = (uint32_t) min(bytes_remaining, (size_t) PAGE_SIZE);
    603         uint32_t chunk = 0;
    604         void *kbuf = kmap(kpage);
    605         while (chunk < req_size) {
    606             retval = vbsf_reg_read_aux(__func__, sf_g, sf_r, kbuf + chunk, &nread, offset);
    607             if (retval < 0)
    608                 goto err;
    609             if (nread == 0)
    610                 break;
    611             chunk += nread;
    612             offset += nread;
    613             nread = req_size - chunk;
    614         }
    615         if (!pipe->readers) {
    616             send_sig(SIGPIPE, current, 0);
    617             retval = -EPIPE;
    618             goto err;
    619         }
    620         if (pipe->nrbufs < PIPE_BUFFERS) {
    621             struct pipe_buffer *pipebuf = pipe->bufs + ((pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1));
    622             pipebuf->page = kpage;
    623             pipebuf->ops = &vbsf_pipe_buf_ops;
    624             pipebuf->len = req_size;
    625             pipebuf->offset = 0;
    626             pipebuf->private = 0;
    627             pipebuf->flags = 0;
    628             pipe->nrbufs++;
    629             nsent += req_size;
    630             bytes_remaining -= req_size;
    631             if (signal_pending(current))
    632                 break;
    633         } else {    /* pipe full */
    634 
    635             if (flags & SPLICE_F_NONBLOCK) {
    636                 retval = -EAGAIN;
    637                 goto err;
    638             }
    639             vbsf_free_pipebuf(kpage);
    640             break;
    641         }
    642     }
    643     UNLOCK_PIPE(pipe);
    644     if (!nsent && signal_pending(current))
    645         return -ERESTARTSYS;
    646     *poffset += nsent;
    647     return offset - orig_offset;
    648 
    649  err:
    650     UNLOCK_PIPE(pipe);
    651     vbsf_free_pipebuf(kpage);
    652     return retval;
    653 }
    654 
    655 #endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
    656 
    657 
    658 /*********************************************************************************************************************************
    659 *   File operations on regular files                                                                                             *
     500*   Misc                                                                                                                         *
    660501*********************************************************************************************************************************/
    661502
     
    675516        && 1 /** @todo make this behaviour configurable at mount time (sf_g) */;
    676517}
     518
     519
     520
     521/*********************************************************************************************************************************
     522*   Pipe / splice stuff for 2.6.23 >= linux < 2.6.31 (where no fallbacks were available)                                         *
     523*********************************************************************************************************************************/
     524
     525#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23) \
     526 && LINUX_VERSION_CODE <  KERNEL_VERSION(2, 6, 31)
     527
     528
     529/** Verify pipe buffer content (needed for page-cache to ensure idle page). */
     530static int vbsf_pipe_buf_confirm(struct pipe_inode_info *info, struct pipe_buffer *pPipeBuf)
     531{
     532    /*SFLOG3(("vbsf_pipe_buf_confirm: %p\n", pPipeBuf));*/
     533    return 0;
     534}
     535
     536/** Maps the buffer page. */
     537static void *vbsf_pipe_buf_map(struct pipe_inode_info *pipe, struct pipe_buffer *pPipeBuf, int atomic)
     538{
     539    void *pvRet;
     540    if (!atomic)
     541        pvRet = kmap(pPipeBuf->page);
     542    else {
     543        pPipeBuf->flags |= PIPE_BUF_FLAG_ATOMIC;
     544        pvRet = kmap_atomic(pPipeBuf->page, KM_USER0);
     545    }
     546    /*SFLOG3(("vbsf_pipe_buf_map: %p -> %p\n", pPipeBuf, pvRet));*/
     547    return pvRet;
     548}
     549
     550/** Unmaps the buffer page. */
     551static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pipe, struct pipe_buffer *pPipeBuf, void *pvMapping)
     552{
     553    /*SFLOG3(("vbsf_pipe_buf_unmap: %p/%p\n", pPipeBuf, pvMapping)); */
     554    if (!(pPipeBuf->flags & PIPE_BUF_FLAG_ATOMIC))
     555        kunmap(pPipeBuf->page);
     556    else {
     557        pPipeBuf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
     558        kunmap_atomic(pvMapping, KM_USER0);
     559    }
     560}
     561
     562/** Gets a reference to the page. */
     563static void vbsf_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *pPipeBuf)
     564{
     565    page_cache_get(pPipeBuf->page);
     566    /*SFLOG3(("vbsf_pipe_buf_get: %p (return count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/
     567}
     568
     569/** Release the buffer page (counter to vbsf_pipe_buf_get). */
     570static void vbsf_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *pPipeBuf)
     571{
     572    /*SFLOG3(("vbsf_pipe_buf_release: %p (incoming count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/
     573    page_cache_release(pPipeBuf->page);
     574}
     575
     576/** Attempt to steal the page.
     577 * @returns 0 success, 1 on failure.  */
     578static int vbsf_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *pPipeBuf)
     579{
     580    if (page_count(pPipeBuf->page) == 1) {
     581        lock_page(pPipeBuf->page);
     582        SFLOG3(("vbsf_pipe_buf_steal: %p -> 0\n", pPipeBuf));
     583        return 0;
     584    }
     585    SFLOG3(("vbsf_pipe_buf_steal: %p -> 1\n", pPipeBuf));
     586    return 1;
     587}
     588
     589/**
     590 * Pipe buffer operations for used by vbsf_feed_pages_to_pipe.
     591 */
     592static struct pipe_buf_operations vbsf_pipe_buf_ops = {
     593    .can_merge = 0,
     594# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
     595    .confirm   = vbsf_pipe_buf_confirm,
     596# else
     597    .pin       = vbsf_pipe_buf_confirm,
     598# endif
     599    .map       = vbsf_pipe_buf_map,
     600    .unmap     = vbsf_pipe_buf_unmap,
     601    .get       = vbsf_pipe_buf_get,
     602    .release   = vbsf_pipe_buf_release,
     603    .steal     = vbsf_pipe_buf_steal,
     604};
     605
     606# define LOCK_PIPE(pipe)   do { if ((pipe)->inode) mutex_lock(&(pipe)->inode->i_mutex); } while (0)
     607# define UNLOCK_PIPE(pipe) do { if ((pipe)->inode) mutex_unlock(&(pipe)->inode->i_mutex); } while (0)
     608
     609/** Waits for the pipe buffer status to change. */
     610static void vbsf_wait_pipe(struct pipe_inode_info *pPipe)
     611{
     612    DEFINE_WAIT(WaitStuff);
     613# ifdef TASK_NONINTERACTIVE
     614    prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
     615# else
     616    prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE);
     617# endif
     618    UNLOCK_PIPE(pPipe);
     619
     620    schedule();
     621
     622    finish_wait(&pPipe->wait, &WaitStuff);
     623    LOCK_PIPE(pPipe);
     624}
     625
     626/** Worker for vbsf_feed_pages_to_pipe that wakes up readers. */
     627static void vbsf_wake_up_pipe(struct pipe_inode_info *pPipe, bool fReaders)
     628{
     629    smp_mb();
     630    if (waitqueue_active(&pPipe->wait))
     631        wake_up_interruptible_sync(&pPipe->wait);
     632    if (fReaders)
     633        kill_fasync(&pPipe->fasync_readers, SIGIO, POLL_IN);
     634    else
     635        kill_fasync(&pPipe->fasync_writers, SIGIO, POLL_OUT);
     636}
     637
     638/**
     639 * Feeds the pages to the pipe.
     640 *
     641 * Pages given to the pipe are set to NULL in papPages.
     642 */
     643static ssize_t vbsf_feed_pages_to_pipe(struct pipe_inode_info *pPipe, struct page **papPages, size_t cPages, uint32_t offPg0,
     644                                       uint32_t cbActual, unsigned fFlags)
     645{
     646    ssize_t cbRet       = 0;
     647    size_t  iPage       = 0;
     648    bool    fNeedWakeUp = false;
     649
     650    LOCK_PIPE(pPipe);
     651    for (;;) {
     652        if (   pPipe->readers > 0
     653            && pPipe->nrbufs < PIPE_BUFFERS) {
     654            struct pipe_buffer *pPipeBuf   = &pPipe->bufs[(pPipe->curbuf + pPipe->nrbufs) % PIPE_BUFFERS];
     655            uint32_t const      cbThisPage = RT_MIN(cbActual, PAGE_SIZE - offPg0);
     656            pPipeBuf->len       = cbThisPage;
     657            pPipeBuf->offset    = offPg0;
     658# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
     659            pPipeBuf->private   = 0;
     660# endif
     661            pPipeBuf->ops       = &vbsf_pipe_buf_ops;
     662            pPipeBuf->flags     = fFlags & SPLICE_F_GIFT ? PIPE_BUF_FLAG_GIFT : 0;
     663            pPipeBuf->page      = papPages[iPage];
     664
     665            papPages[iPage++] = NULL;
     666            pPipe->nrbufs++;
     667            fNeedWakeUp |= pPipe->inode != NULL;
     668            offPg0 = 0;
     669            cbRet += cbThisPage;
     670
     671            /* done? */
     672            cbActual -= cbThisPage;
     673            if (!cbActual)
     674                break;
     675        } else if (pPipe->readers == 0) {
     676            SFLOGFLOW(("vbsf_feed_pages_to_pipe: no readers!\n"));
     677            send_sig(SIGPIPE, current, 0);
     678            if (cbRet == 0)
     679                cbRet = -EPIPE;
     680            break;
     681        } else if (fFlags & SPLICE_F_NONBLOCK) {
     682            if (cbRet == 0)
     683                cbRet = -EAGAIN;
     684            break;
     685        } else if (signal_pending(current)) {
     686            if (cbRet == 0)
     687                cbRet = -ERESTARTSYS;
     688            SFLOGFLOW(("vbsf_feed_pages_to_pipe: pending signal! (%d)\n", cbRet));
     689            break;
     690        } else {
     691            if (fNeedWakeUp) {
     692                vbsf_wake_up_pipe(pPipe, true /*fReaders*/);
     693                fNeedWakeUp = 0;
     694            }
     695            pPipe->waiting_writers++;
     696            vbsf_wait_pipe(pPipe);
     697            pPipe->waiting_writers--;
     698        }
     699    }
     700    UNLOCK_PIPE(pPipe);
     701
     702    if (fNeedWakeUp)
     703        vbsf_wake_up_pipe(pPipe, true /*fReaders*/);
     704
     705    return cbRet;
     706}
     707
     708
     709/**
     710 * For splicing from a file to a pipe.
     711 */
     712static ssize_t vbsf_splice_read(struct file *in, loff_t * poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
     713{
     714    struct inode           *inode = VBSF_GET_F_DENTRY(in)->d_inode;
     715    struct vbsf_super_info *sf_g  = VBSF_GET_SUPER_INFO(inode->i_sb);
     716    ssize_t                 cbRet;
     717
     718    SFLOGFLOW(("vbsf_splice_read: in=%p poffset=%p{%#RX64} pipe=%p len=%#zx flags=%#x\n", in, poffset, *poffset, pipe, len, flags));
     719    if (vbsf_should_use_cached_read(in, inode->i_mapping, sf_g)) {
     720        cbRet = generic_file_splice_read(in, poffset, pipe, len, flags);
     721    } else {
     722        /*
     723         * Create a read request.
     724         */
     725        loff_t              offFile = *poffset;
     726        size_t              cPages  = RT_MIN(RT_ALIGN_Z((offFile & ~PAGE_CACHE_MASK) + len, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT,
     727                                             PIPE_BUFFERS);
     728        VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ,
     729                                                                                              PgLst.aPages[cPages]));
     730        if (pReq) {
     731            /*
     732             * Allocate pages.
     733             */
     734            struct page *apPages[PIPE_BUFFERS];
     735            size_t       i;
     736            pReq->PgLst.offFirstPage = (uint16_t)offFile & (uint16_t)PAGE_OFFSET_MASK;
     737            cbRet = 0;
     738            for (i = 0; i < cPages; i++) {
     739                struct page *pPage;
     740                apPages[i] = pPage = alloc_page(GFP_USER);
     741                if (pPage) {
     742                    pReq->PgLst.aPages[i] = page_to_phys(pPage);
     743# ifdef VBOX_STRICT
     744                    ASMMemFill32(kmap(pPage), PAGE_SIZE, UINT32_C(0xdeadbeef));
     745                    kunmap(pPage);
     746# endif
     747                } else {
     748                    cbRet = -ENOMEM;
     749                    break;
     750                }
     751            }
     752            if (cbRet == 0) {
     753                /*
     754                 * Do the reading.
     755                 */
     756                uint32_t const          cbToRead = RT_MIN((cPages << PAGE_SHIFT) - (offFile & PAGE_OFFSET_MASK), len);
     757                struct vbsf_reg_info   *sf_r     = (struct vbsf_reg_info *)in->private_data;
     758                int vrc = VbglR0SfHostReqReadPgLst(sf_g->map.root, pReq, sf_r->Handle.hHost, offFile, cbToRead, cPages);
     759                if (RT_SUCCESS(vrc)) {
     760                    /*
     761                     * Get the number of bytes read, jettison the request
     762                     * and, in case of EOF, any unnecessary pages.
     763                     */
     764                    uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
     765                    AssertStmt(cbActual <= cbToRead, cbActual = cbToRead);
     766                    SFLOG2(("vbsf_splice_read: read -> %#x bytes @ %#RX64\n", cbActual, offFile));
     767
     768                    VbglR0PhysHeapFree(pReq);
     769                    pReq = NULL;
     770
     771                    /*
     772                     * Now, feed it to the pipe thingy.
     773                     * This will take ownership of the all pages no matter what happens.
     774                     */
     775                    cbRet = vbsf_feed_pages_to_pipe(pipe, apPages, cPages, offFile & PAGE_OFFSET_MASK, cbActual, flags);
     776                    if (cbRet > 0)
     777                        *poffset = offFile + cbRet;
     778                } else {
     779                    cbRet = -RTErrConvertToErrno(vrc);
     780                    SFLOGFLOW(("vbsf_splice_read: Read failed: %Rrc -> %zd\n", vrc, cbRet));
     781                }
     782                i = cPages;
     783            }
     784
     785            while (i-- > 0)
     786                if (apPages[i])
     787                    __free_pages(apPages[i], 0);
     788            if (pReq)
     789                VbglR0PhysHeapFree(pReq);
     790        } else {
     791            cbRet = -ENOMEM;
     792        }
     793    }
     794    SFLOGFLOW(("vbsf_splice_read: returns %zd (%#zx), *poffset=%#RX64\n", cbRet, cbRet, *poffset));
     795    return cbRet;
     796}
     797
     798#endif /* 2.6.23 <= LINUX_VERSION_CODE < 2.6.31 */
     799
     800
     801/*********************************************************************************************************************************
     802*   File operations on regular files                                                                                             *
     803*********************************************************************************************************************************/
    677804
    678805/** Wrapper around put_page / page_cache_release.  */
     
    28092936/**
    28102937 * File operations for regular files.
     2938 *
     2939 * Note on splice_read/splice_write/sendfile:
     2940 *      - Splice was introduced in 2.6.17.  The generic_file_splice_read/write
     2941 *        methods go thru the page cache, which is undesirable and is why we
     2942 *        need to cook our own versions of the code as long as we cannot track
     2943 *        host-side writes and correctly invalidate the guest page-cache.
     2944 *      - Sendfile reimplemented using splice in 2.6.23.
     2945 *      - The default_file_splice_read/write no-page-cache fallback functions,
     2946 *        were introduced in 2.6.31.
     2947 *      - Since linux 4.9 the generic_file_splice_read/write functions are using
     2948 *        read_iter/write_iter.
    28112949 */
    28122950struct file_operations vbsf_reg_fops = {
     
    28272965    .mmap            = generic_file_mmap,
    28282966#endif
    2829 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31)
    2830 /** @todo This code is known to cause caching of data which should not be
    2831  * cached.  Investigate --
    2832  * bird: Part of this was using generic page cache functions for
    2833  * implementing .aio_read/write.  Fixed that (see above). */
    2834 # if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 23)
     2967#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 31) && LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 17)
    28352968    .splice_read     = vbsf_splice_read,
    2836 # else
    2837     .sendfile        = generic_file_sendfile,
    2838 # endif
     2969    /// @todo .splice_write    = vbsf_splice_write,
     2970#endif
     2971#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 23)
     2972    .sendfile        = generic_file_sendfile, /**< @todo this goes thru page cache. */
    28392973#endif
    28402974    .llseek          = vbsf_reg_llseek,
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette