VirtualBox

Changeset 79983 in vbox for trunk/src/VBox/Runtime/r3


Ignore:
Timestamp:
Jul 25, 2019 5:21:24 PM (6 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
132417
Message:

Runtime/RTIoQueue: Updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/r3/linux/ioqueue-iouringfile-provider.cpp

    r79953 r79983  
    6868#include <iprt/string.h>
    6969
     70#include <errno.h>
     71#include <unistd.h>
     72#include <signal.h>
    7073#include <sys/mman.h>
    71 #include <unistd.h>
    7274#include <sys/syscall.h>
    73 #include <errno.h>
    74 #include <signal.h>
     75#include <sys/uio.h>
    7576
    7677#include "internal/ioqueue.h"
     
    8788/** The syscall number of io_uring_register(). */
    8889#define LNX_IOURING_SYSCALL_REGISTER  427
    89 
     90/** eventfd2() syscall not associated with io_uring but used for kicking waiters. */
     91#define LNX_SYSCALL_EVENTFD2           19
    9092
    9193/*********************************************************************************************************************************
     
    409411    /** The io_uring file descriptor. */
    410412    int                         iFdIoCtx;
     413    /** The eventfd file descriptor registered with the ring. */
     414    int                         iFdEvt;
    411415    /** The submission queue. */
    412416    RTIOQUEUESQ                 Sq;
     417    /** The currently uncommitted tail for the SQ. */
     418    uint32_t                    idxSqTail;
     419    /** Numbere of uncommitted SQEs. */
     420    uint32_t                    cSqesToCommit;
    413421    /** The completion queue. */
    414422    RTIOQUEUECQ                 Cq;
    415423    /** Pointer to the mapped SQES entries. */
    416424    PLNXIOURINGSQE              paSqes;
     425    /** Pointer to the iovec structure used for non S/G requests. */
     426    struct iovec                *paIoVecs;
    417427    /** Pointer returned by mmap() for the SQ ring, used for unmapping. */
    418428    void                        *pvMMapSqRing;
     
    427437    /** Size of the mapped SQ entries array, used for unmapping. */
    428438    size_t                      cbMMapSqes;
     439    /** Flag whether the waiter was woken up externally. */
     440    volatile bool               fExtIntr;
    429441} RTIOQUEUEPROVINT;
    430442/** Pointer to the internal I/O queue provider instance data. */
     
    519531
    520532
     533/**
     534 * eventfd2() syscall wrapper.
     535 *
     536 * @returns IPRT status code.
     537 * @param   uValInit            The initial value of the maintained counter.
     538 * @param   fFlags              Flags controlling the eventfd behavior.
     539 * @param   piFdEvt             Where to store the file descriptor of the eventfd object on success.
     540 */
     541DECLINLINE(int) rtIoQueueLnxEventfd2(uint32_t uValInit, uint32_t fFlags, int *piFdEvt)
     542{
     543    int rcLnx = syscall(LNX_SYSCALL_EVENTFD2, uValInit, fFlags);
     544    if (RT_UNLIKELY(rcLnx == -1))
     545        return RTErrConvertFromErrno(errno);
     546
     547    *piFdEvt = rcLnx;
     548    return VINF_SUCCESS;
     549}
     550
     551
     552/**
     553 * Checks the completion event queue for pending events.
     554 *
     555 * @returns nothing.
     556 * @param   pThis               The provider instance.
     557 * @param   paCEvt              Pointer to the array of completion events.
     558 * @param   cCEvt               Maximum number of completion events the array can hold.
     559 * @param   pcCEvtSeen          Where to store the number of completion events processed.
     560 */
     561static void rtIoQueueLnxIoURingFileProvCqCheck(PRTIOQUEUEPROVINT pThis, PRTIOQUEUECEVT paCEvt,
     562                                               uint32_t cCEvt, uint32_t *pcCEvtSeen)
     563{
     564    /* The fencing and atomic accesses are kind of overkill and probably not required (dev paranoia). */
     565    ASMReadFence();
     566    uint32_t idxCqHead = ASMAtomicReadU32(pThis->Cq.pidxHead);
     567    uint32_t idxCqTail = ASMAtomicReadU32(pThis->Cq.pidxTail);
     568    ASMReadFence();
     569
     570    uint32_t cCEvtSeen = 0;
     571
     572    while (   idxCqTail != idxCqHead
     573           && cCEvtSeen < cCEvt)
     574    {
     575        /* Get the index. */
     576        uint32_t idxCqe = idxCqHead & pThis->Cq.fRingMask;
     577        volatile LNXIOURINGCQE *pCqe = &pThis->Cq.paCqes[idxCqe];
     578
     579        paCEvt->pvUser = (void *)(uintptr_t)pCqe->u64User;
     580        if (pCqe->rcLnx >= 0)
     581        {
     582            paCEvt->rcReq    = VINF_SUCCESS;
     583            paCEvt->cbXfered = (size_t)pCqe->rcLnx;
     584        }
     585        else
     586            paCEvt->rcReq = RTErrConvertFromErrno(-pCqe->rcLnx);
     587
     588        paCEvt++;
     589        cCEvtSeen++;
     590        idxCqHead++;
     591    }
     592
     593    *pcCEvtSeen = cCEvtSeen;
     594
     595    /* Paranoia strikes again. */
     596    ASMWriteFence();
     597    ASMAtomicWriteU32(pThis->Cq.pidxHead, idxCqHead);
     598    ASMWriteFence();
     599}
     600
     601
    521602/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnIsSupported} */
    522603static DECLCALLBACK(bool) rtIoQueueLnxIoURingFileProv_IsSupported(void)
     
    527608     */
    528609    int iFdIoCtx = 0;
     610    bool fSupp = false;
    529611    LNXIOURINGPARAMS Params;
    530612    RT_ZERO(Params);
     
    533615    if (RT_SUCCESS(rc))
    534616    {
     617        /*
     618         * Check that we can register an eventfd descriptor to get notified about
     619         * completion events while being able to kick the waiter externally out of the wait.
     620         */
     621        int iFdEvt = 0;
     622        rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &iFdEvt);
     623        if (RT_SUCCESS(rc))
     624        {
     625            rc = rtIoQueueLnxIoURingRegister(iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER,
     626                                             &iFdEvt, 1 /*cArgs*/);
     627            if (RT_SUCCESS(rc))
     628                fSupp = true;
     629
     630            int rcLnx = close(iFdEvt); Assert(!rcLnx); RT_NOREF(rcLnx);
     631        }
    535632        int rcLnx = close(iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
    536         return true;
    537633    }
    538634
    539     return false;
     635    return fSupp;
    540636}
    541637
     
    543639/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueInit} */
    544640static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_QueueInit(RTIOQUEUEPROV hIoQueueProv, uint32_t fFlags,
    545                                                                size_t cSqEntries, size_t cCqEntries)
     641                                                               uint32_t cSqEntries, uint32_t cCqEntries)
    546642{
    547643    RT_NOREF(fFlags, cCqEntries);
     
    550646    LNXIOURINGPARAMS Params;
    551647    RT_ZERO(Params);
     648
     649    pThis->cSqesToCommit = 0;
     650    pThis->fExtIntr      = false;
    552651
    553652    int rc = rtIoQueueLnxIoURingSetup(cSqEntries, &Params, &pThis->iFdIoCtx);
     
    559658        pThis->cbMMapSqes   = Params.u32SqEntriesCnt * sizeof(LNXIOURINGSQE);
    560659
    561         rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);
    562         if (RT_SUCCESS(rc))
     660        pThis->paIoVecs = (struct iovec *)RTMemAllocZ(Params.u32SqEntriesCnt * sizeof(struct iovec));
     661        if (RT_LIKELY(pThis->paIoVecs))
    563662        {
    564             rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);
     663            rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &pThis->iFdEvt);
    565664            if (RT_SUCCESS(rc))
    566665            {
    567                 rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);
     666                rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, &pThis->iFdEvt, 1 /*cArgs*/);
    568667                if (RT_SUCCESS(rc))
    569668                {
    570                     uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing;
    571 
    572                     pThis->Sq.pidxHead  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead);
    573                     pThis->Sq.pidxTail  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail);
    574                     pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask);
    575                     pThis->Sq.cEntries  = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries);
    576                     pThis->Sq.pfFlags   = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags);
    577                     pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray);
    578 
    579                     pThis->paSqes       = (PLNXIOURINGSQE)pThis->pvMMapSqes;
    580 
    581                     pbTmp = (uint8_t *)pThis->pvMMapCqRing;
    582 
    583                     pThis->Cq.pidxHead  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead);
    584                     pThis->Cq.pidxTail  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail);
    585                     pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask);
    586                     pThis->Cq.cEntries  = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries);
    587                     pThis->Cq.paCqes    = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes);
    588                     return VINF_SUCCESS;
     669                    rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);
     670                    if (RT_SUCCESS(rc))
     671                    {
     672                        rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);
     673                        if (RT_SUCCESS(rc))
     674                        {
     675                            rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);
     676                            if (RT_SUCCESS(rc))
     677                            {
     678                                uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing;
     679
     680                                pThis->Sq.pidxHead  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead);
     681                                pThis->Sq.pidxTail  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail);
     682                                pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask);
     683                                pThis->Sq.cEntries  = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries);
     684                                pThis->Sq.pfFlags   = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags);
     685                                pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray);
     686                                pThis->idxSqTail    = *pThis->Sq.pidxTail;
     687
     688                                pThis->paSqes       = (PLNXIOURINGSQE)pThis->pvMMapSqes;
     689
     690                                pbTmp = (uint8_t *)pThis->pvMMapCqRing;
     691
     692                                pThis->Cq.pidxHead  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead);
     693                                pThis->Cq.pidxTail  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail);
     694                                pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask);
     695                                pThis->Cq.cEntries  = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries);
     696                                pThis->Cq.paCqes    = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes);
     697                                return VINF_SUCCESS;
     698                            }
     699
     700                            munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);
     701                        }
     702
     703                        munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);
     704                    }
     705
     706                    rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
     707                    AssertRC(rc);
    589708                }
    590709
    591                 munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);
     710                close(pThis->iFdEvt);
    592711            }
    593712
    594             munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);
     713            RTMemFree(pThis->paIoVecs);
    595714        }
     715
     716        int rcLnx = close(pThis->iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
    596717    }
    597718
     
    608729    rcLnx = munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
    609730    rcLnx = munmap(pThis->pvMMapSqes, pThis->cbMMapSqes); Assert(!rcLnx); RT_NOREF(rcLnx);
     731
     732    int rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
     733    AssertRC(rc);
     734
     735    close(pThis->iFdEvt);
    610736    close(pThis->iFdIoCtx);
     737    RTMemFree(pThis->paIoVecs);
    611738
    612739    RT_ZERO(pThis);
     
    638765{
    639766    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
    640     RT_NOREF(pThis, pHandle, enmOp, off, pvBuf, cbBuf, fReqFlags, pvUser);
    641 
    642     return VERR_NOT_IMPLEMENTED;
     767    RT_NOREF(fReqFlags);
     768
     769    uint32_t idx = pThis->idxSqTail & pThis->Sq.fRingMask;
     770    PLNXIOURINGSQE pSqe = &pThis->paSqes[idx];
     771    struct iovec *pIoVec = &pThis->paIoVecs[idx];
     772
     773    pIoVec->iov_base = pvBuf;
     774    pIoVec->iov_len  = cbBuf;
     775
     776    pSqe->u8Flags         = 0;
     777    pSqe->u16IoPrio       = 0;
     778    pSqe->i32Fd           = (int32_t)RTFileToNative(pHandle->u.hFile);
     779    pSqe->u64OffStart     = off;
     780    pSqe->u64AddrBufIoVec = (uint64_t)(uintptr_t)pIoVec;
     781    pSqe->u64User         = (uint64_t)(uintptr_t)pvUser;
     782
     783    switch (enmOp)
     784    {
     785        case RTIOQUEUEOP_READ:
     786            pSqe->u8Opc               = LNX_IOURING_OPC_READV;
     787            pSqe->uOpc.u32KrnlRwFlags = 0;
     788            break;
     789        case RTIOQUEUEOP_WRITE:
     790            pSqe->u8Opc               = LNX_IOURING_OPC_WRITEV;
     791            pSqe->uOpc.u32KrnlRwFlags = 0;
     792            break;
     793        case RTIOQUEUEOP_SYNC:
     794            pSqe->u8Opc              = LNX_IOURING_OPC_FSYNC;
     795            pSqe->uOpc.u32FsyncFlags = 0;
     796            break;
     797        default:
     798            AssertMsgFailedReturn(("Invalid I/O queue operation: %d\n", enmOp),
     799                                  VERR_INVALID_PARAMETER);
     800    }
     801
     802    pThis->idxSqTail++;
     803    pThis->cSqesToCommit++;
     804    return VINF_SUCCESS;
    643805}
    644806
     
    650812    RT_NOREF(pThis, pcReqsCommitted);
    651813
    652     return VERR_NOT_IMPLEMENTED;
     814    ASMWriteFence();
     815    ASMAtomicWriteU32(pThis->Sq.pidxTail, pThis->idxSqTail);
     816    ASMWriteFence();
     817
     818    int rc = rtIoQueueLnxIoURingEnter(pThis->iFdIoCtx, pThis->cSqesToCommit, 0, 0 /*fFlags*/);
     819    if (RT_SUCCESS(rc))
     820    {
     821        *pcReqsCommitted = pThis->cSqesToCommit;
     822        pThis->cSqesToCommit = 0;
     823    }
     824
     825    return rc;
    653826}
    654827
     
    659832{
    660833    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
    661     RT_NOREF(pThis, paCEvt, cCEvt, cMinWait, pcCEvt, fFlags);
    662 
    663     return VERR_NOT_IMPLEMENTED;;
     834    int rc = VINF_SUCCESS;
     835    uint32_t cCEvtSeen = 0;
     836
     837    RT_NOREF(fFlags);
     838
     839    /*
     840     * Check the completion queue first for any completed events which might save us a
     841     * context switch later on.
     842     */
     843    rtIoQueueLnxIoURingFileProvCqCheck(pThis, paCEvt, cCEvt, &cCEvtSeen);
     844
     845    while (   cCEvtSeen < cMinWait
     846           && RT_SUCCESS(rc))
     847    {
     848        /*
     849         * We can employ a blocking read on the event file descriptor, it will return
     850         * either when woken up externally or when there are completion events pending.
     851         */
     852        uint64_t uCnt = 0; /**< The counter value returned upon a successful read(). */
     853        ssize_t rcLnx = read(pThis->iFdEvt, &uCnt, sizeof(uCnt));
     854        if (rcLnx == sizeof(uCnt))
     855        {
     856            uint32_t cCEvtThisSeen = 0;
     857            rtIoQueueLnxIoURingFileProvCqCheck(pThis, &paCEvt[cCEvtSeen], cCEvt - cCEvtSeen, &cCEvtThisSeen);
     858            cCEvtSeen += cCEvtThisSeen;
     859
     860            /* Whether we got woken up externally. */
     861            if (ASMAtomicXchgBool(&pThis->fExtIntr, false))
     862                rc = VERR_INTERRUPTED;
     863        }
     864        else if (rcLnx == -1)
     865            rc = RTErrConvertFromErrno(errno);
     866        else
     867            AssertMsgFailed(("Unexpected read() -> 0\n"));
     868    }
     869
     870    *pcCEvt = cCEvtSeen;
     871    return rc;
    664872}
    665873
     
    669877{
    670878    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
    671     RT_NOREF(pThis);
    672 
    673     return VERR_NOT_IMPLEMENTED;
     879    int rc = VINF_SUCCESS;
     880
     881    if (!ASMAtomicXchgBool(&pThis->fExtIntr, true))
     882    {
     883        const uint64_t uValAdd = 1;
     884        ssize_t rcLnx = write(pThis->iFdEvt, &uValAdd, sizeof(uValAdd));
     885
     886        Assert(rcLnx == -1 || rcLnx == sizeof(uValAdd));
     887        if (rcLnx == -1)
     888            rc = RTErrConvertFromErrno(errno);
     889    }
     890
     891    return rc;
    674892}
    675893
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette