Changeset 79983 in vbox for trunk/src/VBox/Runtime/r3
- Timestamp:
- Jul 25, 2019 5:21:24 PM (6 years ago)
- svn:sync-xref-src-repo-rev:
- 132417
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/r3/linux/ioqueue-iouringfile-provider.cpp
r79953 r79983 68 68 #include <iprt/string.h> 69 69 70 #include <errno.h> 71 #include <unistd.h> 72 #include <signal.h> 70 73 #include <sys/mman.h> 71 #include <unistd.h>72 74 #include <sys/syscall.h> 73 #include <errno.h> 74 #include <signal.h> 75 #include <sys/uio.h> 75 76 76 77 #include "internal/ioqueue.h" … … 87 88 /** The syscall number of io_uring_register(). */ 88 89 #define LNX_IOURING_SYSCALL_REGISTER 427 89 90 /** eventfd2() syscall not associated with io_uring but used for kicking waiters. */ 91 #define LNX_SYSCALL_EVENTFD2 19 90 92 91 93 /********************************************************************************************************************************* … … 409 411 /** The io_uring file descriptor. */ 410 412 int iFdIoCtx; 413 /** The eventfd file descriptor registered with the ring. */ 414 int iFdEvt; 411 415 /** The submission queue. */ 412 416 RTIOQUEUESQ Sq; 417 /** The currently uncommitted tail for the SQ. */ 418 uint32_t idxSqTail; 419 /** Numbere of uncommitted SQEs. */ 420 uint32_t cSqesToCommit; 413 421 /** The completion queue. */ 414 422 RTIOQUEUECQ Cq; 415 423 /** Pointer to the mapped SQES entries. */ 416 424 PLNXIOURINGSQE paSqes; 425 /** Pointer to the iovec structure used for non S/G requests. */ 426 struct iovec *paIoVecs; 417 427 /** Pointer returned by mmap() for the SQ ring, used for unmapping. */ 418 428 void *pvMMapSqRing; … … 427 437 /** Size of the mapped SQ entries array, used for unmapping. */ 428 438 size_t cbMMapSqes; 439 /** Flag whether the waiter was woken up externally. */ 440 volatile bool fExtIntr; 429 441 } RTIOQUEUEPROVINT; 430 442 /** Pointer to the internal I/O queue provider instance data. */ … … 519 531 520 532 533 /** 534 * eventfd2() syscall wrapper. 535 * 536 * @returns IPRT status code. 537 * @param uValInit The initial value of the maintained counter. 538 * @param fFlags Flags controlling the eventfd behavior. 539 * @param piFdEvt Where to store the file descriptor of the eventfd object on success. 540 */ 541 DECLINLINE(int) rtIoQueueLnxEventfd2(uint32_t uValInit, uint32_t fFlags, int *piFdEvt) 542 { 543 int rcLnx = syscall(LNX_SYSCALL_EVENTFD2, uValInit, fFlags); 544 if (RT_UNLIKELY(rcLnx == -1)) 545 return RTErrConvertFromErrno(errno); 546 547 *piFdEvt = rcLnx; 548 return VINF_SUCCESS; 549 } 550 551 552 /** 553 * Checks the completion event queue for pending events. 554 * 555 * @returns nothing. 556 * @param pThis The provider instance. 557 * @param paCEvt Pointer to the array of completion events. 558 * @param cCEvt Maximum number of completion events the array can hold. 559 * @param pcCEvtSeen Where to store the number of completion events processed. 560 */ 561 static void rtIoQueueLnxIoURingFileProvCqCheck(PRTIOQUEUEPROVINT pThis, PRTIOQUEUECEVT paCEvt, 562 uint32_t cCEvt, uint32_t *pcCEvtSeen) 563 { 564 /* The fencing and atomic accesses are kind of overkill and probably not required (dev paranoia). */ 565 ASMReadFence(); 566 uint32_t idxCqHead = ASMAtomicReadU32(pThis->Cq.pidxHead); 567 uint32_t idxCqTail = ASMAtomicReadU32(pThis->Cq.pidxTail); 568 ASMReadFence(); 569 570 uint32_t cCEvtSeen = 0; 571 572 while ( idxCqTail != idxCqHead 573 && cCEvtSeen < cCEvt) 574 { 575 /* Get the index. */ 576 uint32_t idxCqe = idxCqHead & pThis->Cq.fRingMask; 577 volatile LNXIOURINGCQE *pCqe = &pThis->Cq.paCqes[idxCqe]; 578 579 paCEvt->pvUser = (void *)(uintptr_t)pCqe->u64User; 580 if (pCqe->rcLnx >= 0) 581 { 582 paCEvt->rcReq = VINF_SUCCESS; 583 paCEvt->cbXfered = (size_t)pCqe->rcLnx; 584 } 585 else 586 paCEvt->rcReq = RTErrConvertFromErrno(-pCqe->rcLnx); 587 588 paCEvt++; 589 cCEvtSeen++; 590 idxCqHead++; 591 } 592 593 *pcCEvtSeen = cCEvtSeen; 594 595 /* Paranoia strikes again. */ 596 ASMWriteFence(); 597 ASMAtomicWriteU32(pThis->Cq.pidxHead, idxCqHead); 598 ASMWriteFence(); 599 } 600 601 521 602 /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnIsSupported} */ 522 603 static DECLCALLBACK(bool) rtIoQueueLnxIoURingFileProv_IsSupported(void) … … 527 608 */ 528 609 int iFdIoCtx = 0; 610 bool fSupp = false; 529 611 LNXIOURINGPARAMS Params; 530 612 RT_ZERO(Params); … … 533 615 if (RT_SUCCESS(rc)) 534 616 { 617 /* 618 * Check that we can register an eventfd descriptor to get notified about 619 * completion events while being able to kick the waiter externally out of the wait. 620 */ 621 int iFdEvt = 0; 622 rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &iFdEvt); 623 if (RT_SUCCESS(rc)) 624 { 625 rc = rtIoQueueLnxIoURingRegister(iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, 626 &iFdEvt, 1 /*cArgs*/); 627 if (RT_SUCCESS(rc)) 628 fSupp = true; 629 630 int rcLnx = close(iFdEvt); Assert(!rcLnx); RT_NOREF(rcLnx); 631 } 535 632 int rcLnx = close(iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx); 536 return true;537 633 } 538 634 539 return f alse;635 return fSupp; 540 636 } 541 637 … … 543 639 /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueInit} */ 544 640 static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_QueueInit(RTIOQUEUEPROV hIoQueueProv, uint32_t fFlags, 545 size_t cSqEntries, size_t cCqEntries)641 uint32_t cSqEntries, uint32_t cCqEntries) 546 642 { 547 643 RT_NOREF(fFlags, cCqEntries); … … 550 646 LNXIOURINGPARAMS Params; 551 647 RT_ZERO(Params); 648 649 pThis->cSqesToCommit = 0; 650 pThis->fExtIntr = false; 552 651 553 652 int rc = rtIoQueueLnxIoURingSetup(cSqEntries, &Params, &pThis->iFdIoCtx); … … 559 658 pThis->cbMMapSqes = Params.u32SqEntriesCnt * sizeof(LNXIOURINGSQE); 560 659 561 rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);562 if (RT_ SUCCESS(rc))660 pThis->paIoVecs = (struct iovec *)RTMemAllocZ(Params.u32SqEntriesCnt * sizeof(struct iovec)); 661 if (RT_LIKELY(pThis->paIoVecs)) 563 662 { 564 rc = rtIoQueueLnx IoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);663 rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &pThis->iFdEvt); 565 664 if (RT_SUCCESS(rc)) 566 665 { 567 rc = rtIoQueueLnxIoURing Mmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);666 rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, &pThis->iFdEvt, 1 /*cArgs*/); 568 667 if (RT_SUCCESS(rc)) 569 668 { 570 uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing; 571 572 pThis->Sq.pidxHead = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead); 573 pThis->Sq.pidxTail = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail); 574 pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask); 575 pThis->Sq.cEntries = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries); 576 pThis->Sq.pfFlags = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags); 577 pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray); 578 579 pThis->paSqes = (PLNXIOURINGSQE)pThis->pvMMapSqes; 580 581 pbTmp = (uint8_t *)pThis->pvMMapCqRing; 582 583 pThis->Cq.pidxHead = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead); 584 pThis->Cq.pidxTail = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail); 585 pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask); 586 pThis->Cq.cEntries = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries); 587 pThis->Cq.paCqes = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes); 588 return VINF_SUCCESS; 669 rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing); 670 if (RT_SUCCESS(rc)) 671 { 672 rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing); 673 if (RT_SUCCESS(rc)) 674 { 675 rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes); 676 if (RT_SUCCESS(rc)) 677 { 678 uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing; 679 680 pThis->Sq.pidxHead = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead); 681 pThis->Sq.pidxTail = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail); 682 pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask); 683 pThis->Sq.cEntries = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries); 684 pThis->Sq.pfFlags = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags); 685 pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray); 686 pThis->idxSqTail = *pThis->Sq.pidxTail; 687 688 pThis->paSqes = (PLNXIOURINGSQE)pThis->pvMMapSqes; 689 690 pbTmp = (uint8_t *)pThis->pvMMapCqRing; 691 692 pThis->Cq.pidxHead = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead); 693 pThis->Cq.pidxTail = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail); 694 pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask); 695 pThis->Cq.cEntries = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries); 696 pThis->Cq.paCqes = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes); 697 return VINF_SUCCESS; 698 } 699 700 munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); 701 } 702 703 munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing); 704 } 705 706 rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0); 707 AssertRC(rc); 589 708 } 590 709 591 munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);710 close(pThis->iFdEvt); 592 711 } 593 712 594 munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);713 RTMemFree(pThis->paIoVecs); 595 714 } 715 716 int rcLnx = close(pThis->iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx); 596 717 } 597 718 … … 608 729 rcLnx = munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); Assert(!rcLnx); RT_NOREF(rcLnx); 609 730 rcLnx = munmap(pThis->pvMMapSqes, pThis->cbMMapSqes); Assert(!rcLnx); RT_NOREF(rcLnx); 731 732 int rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0); 733 AssertRC(rc); 734 735 close(pThis->iFdEvt); 610 736 close(pThis->iFdIoCtx); 737 RTMemFree(pThis->paIoVecs); 611 738 612 739 RT_ZERO(pThis); … … 638 765 { 639 766 PRTIOQUEUEPROVINT pThis = hIoQueueProv; 640 RT_NOREF(pThis, pHandle, enmOp, off, pvBuf, cbBuf, fReqFlags, pvUser); 641 642 return VERR_NOT_IMPLEMENTED; 767 RT_NOREF(fReqFlags); 768 769 uint32_t idx = pThis->idxSqTail & pThis->Sq.fRingMask; 770 PLNXIOURINGSQE pSqe = &pThis->paSqes[idx]; 771 struct iovec *pIoVec = &pThis->paIoVecs[idx]; 772 773 pIoVec->iov_base = pvBuf; 774 pIoVec->iov_len = cbBuf; 775 776 pSqe->u8Flags = 0; 777 pSqe->u16IoPrio = 0; 778 pSqe->i32Fd = (int32_t)RTFileToNative(pHandle->u.hFile); 779 pSqe->u64OffStart = off; 780 pSqe->u64AddrBufIoVec = (uint64_t)(uintptr_t)pIoVec; 781 pSqe->u64User = (uint64_t)(uintptr_t)pvUser; 782 783 switch (enmOp) 784 { 785 case RTIOQUEUEOP_READ: 786 pSqe->u8Opc = LNX_IOURING_OPC_READV; 787 pSqe->uOpc.u32KrnlRwFlags = 0; 788 break; 789 case RTIOQUEUEOP_WRITE: 790 pSqe->u8Opc = LNX_IOURING_OPC_WRITEV; 791 pSqe->uOpc.u32KrnlRwFlags = 0; 792 break; 793 case RTIOQUEUEOP_SYNC: 794 pSqe->u8Opc = LNX_IOURING_OPC_FSYNC; 795 pSqe->uOpc.u32FsyncFlags = 0; 796 break; 797 default: 798 AssertMsgFailedReturn(("Invalid I/O queue operation: %d\n", enmOp), 799 VERR_INVALID_PARAMETER); 800 } 801 802 pThis->idxSqTail++; 803 pThis->cSqesToCommit++; 804 return VINF_SUCCESS; 643 805 } 644 806 … … 650 812 RT_NOREF(pThis, pcReqsCommitted); 651 813 652 return VERR_NOT_IMPLEMENTED; 814 ASMWriteFence(); 815 ASMAtomicWriteU32(pThis->Sq.pidxTail, pThis->idxSqTail); 816 ASMWriteFence(); 817 818 int rc = rtIoQueueLnxIoURingEnter(pThis->iFdIoCtx, pThis->cSqesToCommit, 0, 0 /*fFlags*/); 819 if (RT_SUCCESS(rc)) 820 { 821 *pcReqsCommitted = pThis->cSqesToCommit; 822 pThis->cSqesToCommit = 0; 823 } 824 825 return rc; 653 826 } 654 827 … … 659 832 { 660 833 PRTIOQUEUEPROVINT pThis = hIoQueueProv; 661 RT_NOREF(pThis, paCEvt, cCEvt, cMinWait, pcCEvt, fFlags); 662 663 return VERR_NOT_IMPLEMENTED;; 834 int rc = VINF_SUCCESS; 835 uint32_t cCEvtSeen = 0; 836 837 RT_NOREF(fFlags); 838 839 /* 840 * Check the completion queue first for any completed events which might save us a 841 * context switch later on. 842 */ 843 rtIoQueueLnxIoURingFileProvCqCheck(pThis, paCEvt, cCEvt, &cCEvtSeen); 844 845 while ( cCEvtSeen < cMinWait 846 && RT_SUCCESS(rc)) 847 { 848 /* 849 * We can employ a blocking read on the event file descriptor, it will return 850 * either when woken up externally or when there are completion events pending. 851 */ 852 uint64_t uCnt = 0; /**< The counter value returned upon a successful read(). */ 853 ssize_t rcLnx = read(pThis->iFdEvt, &uCnt, sizeof(uCnt)); 854 if (rcLnx == sizeof(uCnt)) 855 { 856 uint32_t cCEvtThisSeen = 0; 857 rtIoQueueLnxIoURingFileProvCqCheck(pThis, &paCEvt[cCEvtSeen], cCEvt - cCEvtSeen, &cCEvtThisSeen); 858 cCEvtSeen += cCEvtThisSeen; 859 860 /* Whether we got woken up externally. */ 861 if (ASMAtomicXchgBool(&pThis->fExtIntr, false)) 862 rc = VERR_INTERRUPTED; 863 } 864 else if (rcLnx == -1) 865 rc = RTErrConvertFromErrno(errno); 866 else 867 AssertMsgFailed(("Unexpected read() -> 0\n")); 868 } 869 870 *pcCEvt = cCEvtSeen; 871 return rc; 664 872 } 665 873 … … 669 877 { 670 878 PRTIOQUEUEPROVINT pThis = hIoQueueProv; 671 RT_NOREF(pThis); 672 673 return VERR_NOT_IMPLEMENTED; 879 int rc = VINF_SUCCESS; 880 881 if (!ASMAtomicXchgBool(&pThis->fExtIntr, true)) 882 { 883 const uint64_t uValAdd = 1; 884 ssize_t rcLnx = write(pThis->iFdEvt, &uValAdd, sizeof(uValAdd)); 885 886 Assert(rcLnx == -1 || rcLnx == sizeof(uValAdd)); 887 if (rcLnx == -1) 888 rc = RTErrConvertFromErrno(errno); 889 } 890 891 return rc; 674 892 } 675 893
Note:
See TracChangeset
for help on using the changeset viewer.