1 | /* $Id: ioqueue-iouringfile-provider.cpp 99739 2023-05-11 01:01:08Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IPRT - I/O queue, Linux io_uring interface I/O file provider.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2019-2023 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * The contents of this file may alternatively be used under the terms
|
---|
26 | * of the Common Development and Distribution License Version 1.0
|
---|
27 | * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | * in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | * CDDL are applicable instead of those of the GPL.
|
---|
30 | *
|
---|
31 | * You may elect to license modified versions of this file under the
|
---|
32 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | *
|
---|
34 | * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | */
|
---|
36 |
|
---|
37 | /** @page pg_rtioqueue_linux RTIoQueue - Linux io_uring implementation notes
|
---|
38 | * @internal
|
---|
39 | *
|
---|
40 | * The io_uring interface is the most recent interface added to the Linux kernel
|
---|
41 | * to deliver fast and efficient I/O. It was first added with kernel version 5.1 and is
|
---|
42 | * thus not available on most systems as of writing this backend (July 2019).
|
---|
43 | * It supersedes the old async I/O interface and cleans up with some restrictions like
|
---|
44 | * having to disable caching for the file.
|
---|
45 | * The interface is centered around a submission and completion queue to queue multiple new
|
---|
46 | * requests for the kernel to process and get notified about completions to reduce the amount
|
---|
47 | * of context switches to an absolute minimum. It also offers advanced features like
|
---|
48 | * registering a fixed set of memory buffers for I/O upfront to reduce the processing overhead
|
---|
49 | * even more.
|
---|
50 | *
|
---|
51 | * The first implementation will only make use of the basic features and more advanced features
|
---|
52 | * will be added later.
|
---|
53 | * The adept developer probably noticed that the public IPRT I/O queue API resembles the io_uring
|
---|
54 | * interface in many aspects. This is not by accident but to reduce our own overhead as much as possible
|
---|
55 | * while still keeping a consistent platform independent API which allows efficient implementations on
|
---|
56 | * other hosts when they come up.
|
---|
57 | *
|
---|
58 | * The public kernel io_uring interface is completely defined in this file to avoid dragging in additional
|
---|
59 | * dependencies and to avoid compile problems on older hosts missing the interface just like it is done
|
---|
60 | * for the Linux RTFileAio* API The necessary interface definitions and descriptions where retrieved from:
|
---|
61 | * * http://kernel.dk/io_uring.pdf
|
---|
62 | * * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/io_uring.h
|
---|
63 | */
|
---|
64 |
|
---|
65 |
|
---|
66 | /*********************************************************************************************************************************
|
---|
67 | * Header Files *
|
---|
68 | *********************************************************************************************************************************/
|
---|
69 | #define LOG_GROUP RTLOGGROUP_IOQUEUE
|
---|
70 | #include <iprt/ioqueue.h>
|
---|
71 |
|
---|
72 | #include <iprt/assertcompile.h>
|
---|
73 | #include <iprt/asm.h>
|
---|
74 | #include <iprt/errcore.h>
|
---|
75 | #include <iprt/file.h>
|
---|
76 | #include <iprt/log.h>
|
---|
77 | #include <iprt/mem.h>
|
---|
78 | #include <iprt/string.h>
|
---|
79 |
|
---|
80 | #include <errno.h>
|
---|
81 | #include <unistd.h>
|
---|
82 | #include <signal.h>
|
---|
83 | #include <sys/mman.h>
|
---|
84 | #include <sys/syscall.h>
|
---|
85 | #include <sys/uio.h>
|
---|
86 |
|
---|
87 | #include "internal/ioqueue.h"
|
---|
88 |
|
---|
89 |
|
---|
90 | /*********************************************************************************************************************************
|
---|
91 | * Defined Constants And Macros *
|
---|
92 | *********************************************************************************************************************************/
|
---|
93 |
|
---|
94 | /** The syscall number of io_uring_setup(). */
|
---|
95 | #define LNX_IOURING_SYSCALL_SETUP 425
|
---|
96 | /** The syscall number of io_uring_enter(). */
|
---|
97 | #define LNX_IOURING_SYSCALL_ENTER 426
|
---|
98 | /** The syscall number of io_uring_register(). */
|
---|
99 | #define LNX_IOURING_SYSCALL_REGISTER 427
|
---|
100 | /** eventfd2() syscall not associated with io_uring but used for kicking waiters. */
|
---|
101 | #define LNX_SYSCALL_EVENTFD2 290
|
---|
102 |
|
---|
103 |
|
---|
104 | /*********************************************************************************************************************************
|
---|
105 | * Structures and Typedefs *
|
---|
106 | *********************************************************************************************************************************/
|
---|
107 |
|
---|
108 | /**
|
---|
109 | * Linux io_uring completion event.
|
---|
110 | */
|
---|
111 | typedef struct LNXIOURINGCQE
|
---|
112 | {
|
---|
113 | /** Opaque user data associated with the completed request. */
|
---|
114 | uint64_t u64User;
|
---|
115 | /** The status code of the request. */
|
---|
116 | int32_t rcLnx;
|
---|
117 | /** Some flags which are not used as of now. */
|
---|
118 | uint32_t fFlags;
|
---|
119 | } LNXIOURINGCQE;
|
---|
120 | AssertCompileSize(LNXIOURINGCQE, 16);
|
---|
121 | /** Pointer to a Linux io_uring completion event. */
|
---|
122 | typedef LNXIOURINGCQE *PLNXIOURINGCQE;
|
---|
123 | /** Pointer to a constant linux io_uring completion event. */
|
---|
124 | typedef const LNXIOURINGCQE *PCLNXIOURINGCQE;
|
---|
125 |
|
---|
126 |
|
---|
127 | /**
|
---|
128 | * Linux io_uring submission queue entry.
|
---|
129 | */
|
---|
130 | typedef struct LNXIOURINGSQE
|
---|
131 | {
|
---|
132 | /** The opcode for the request. */
|
---|
133 | uint8_t u8Opc;
|
---|
134 | /** Common flags for the request. */
|
---|
135 | uint8_t u8Flags;
|
---|
136 | /** Assigned I/O priority. */
|
---|
137 | uint16_t u16IoPrio;
|
---|
138 | /** The file descriptor the request is for. */
|
---|
139 | int32_t i32Fd;
|
---|
140 | /** The start offset into the file for the request. */
|
---|
141 | uint64_t u64OffStart;
|
---|
142 | /** Buffer pointer or Pointer to io vector array depending on opcode. */
|
---|
143 | uint64_t u64AddrBufIoVec;
|
---|
144 | /** Size of the buffer in bytes or number of io vectors. */
|
---|
145 | uint32_t u32BufIoVecSz;
|
---|
146 | /** Opcode dependent data. */
|
---|
147 | union
|
---|
148 | {
|
---|
149 | /** Flags for read/write requests. */
|
---|
150 | uint32_t u32KrnlRwFlags;
|
---|
151 | /** Flags for fsync() like requests. */
|
---|
152 | uint32_t u32FsyncFlags;
|
---|
153 | /** Flags for poll() like requests. */
|
---|
154 | uint16_t u16PollFlags;
|
---|
155 | /** Flags for sync_file_range() like requests. */
|
---|
156 | uint32_t u32SyncFileRangeFlags;
|
---|
157 | /** Flags for requests requiring a msg structure. */
|
---|
158 | uint32_t u32MsgFlags;
|
---|
159 | } uOpc;
|
---|
160 | /** Opaque user data associated with the request and returned durign completion. */
|
---|
161 | uint64_t u64User;
|
---|
162 | /** Request type dependent data. */
|
---|
163 | union
|
---|
164 | {
|
---|
165 | /** Fixed buffer index if indicated by the request flags. */
|
---|
166 | uint16_t u16FixedBufIdx;
|
---|
167 | /** Padding to align the structure to 64 bytes. */
|
---|
168 | uint64_t au64Padding[3];
|
---|
169 | } uReq;
|
---|
170 | } LNXIOURINGSQE;
|
---|
171 | AssertCompileSize(LNXIOURINGSQE, 64);
|
---|
172 | /** Pointer to a Linux io_uring submission queue entry. */
|
---|
173 | typedef LNXIOURINGSQE *PLNXIOURINGSQE;
|
---|
174 | /** Pointer to a constant Linux io_uring submission queue entry. */
|
---|
175 | typedef const LNXIOURINGSQE *PCLNXIOURINGSQE;
|
---|
176 |
|
---|
177 |
|
---|
178 | /**
|
---|
179 | * Linux u_ioring SQ ring header structure to maintain the queue.
|
---|
180 | */
|
---|
181 | typedef struct LNXIOURINGSQ
|
---|
182 | {
|
---|
183 | /** The current head position to fill in new requests. */
|
---|
184 | uint32_t u32OffHead;
|
---|
185 | /** The current tail position the kernel starts processing from. */
|
---|
186 | uint32_t u32OffTail;
|
---|
187 | /** The mask for the head and tail counters to apply to retrieve the index. */
|
---|
188 | uint32_t u32OffRingMask;
|
---|
189 | /** Number of entries in the SQ ring. */
|
---|
190 | uint32_t u32OffRingEntries;
|
---|
191 | /** Flags set asychronously by the kernel. */
|
---|
192 | uint32_t u32OffFlags;
|
---|
193 | /** Counter of dropped requests. */
|
---|
194 | uint32_t u32OffDroppedReqs;
|
---|
195 | /** Offset where to find the array of SQ entries. */
|
---|
196 | uint32_t u32OffArray;
|
---|
197 | /** Reserved. */
|
---|
198 | uint32_t u32Rsvd0;
|
---|
199 | /** Reserved. */
|
---|
200 | uint64_t u64Rsvd1;
|
---|
201 | } LNXIOURINGSQ;
|
---|
202 | AssertCompileSize(LNXIOURINGSQ, 40);
|
---|
203 | /** Pointer to a Linux u_ioring SQ ring header. */
|
---|
204 | typedef LNXIOURINGSQ *PLNXIOURINGSQ;
|
---|
205 | /** Pointer to a constant Linux u_ioring SQ ring header. */
|
---|
206 | typedef const LNXIOURINGSQ *PCLNXIOURINGSQ;
|
---|
207 |
|
---|
208 |
|
---|
209 | /**
|
---|
210 | * Linux io_uring CQ ring header structure to maintain the queue.
|
---|
211 | */
|
---|
212 | typedef struct LNXIOURINGCQ
|
---|
213 | {
|
---|
214 | /** The current head position the kernel modifies when completion events happen. */
|
---|
215 | uint32_t u32OffHead;
|
---|
216 | /** The current tail position to read completion events from. */
|
---|
217 | uint32_t u32OffTail;
|
---|
218 | /** The mask for the head and tail counters to apply to retrieve the index. */
|
---|
219 | uint32_t u32OffRingMask;
|
---|
220 | /** Number of entries in the CQ ring. */
|
---|
221 | uint32_t u32OffRingEntries;
|
---|
222 | /** Number of CQ overflows happened. */
|
---|
223 | uint32_t u32OffOverflowCnt;
|
---|
224 | /** */
|
---|
225 | uint32_t u32OffCqes;
|
---|
226 | /** Reserved. */
|
---|
227 | uint64_t au64Rsvd0[2];
|
---|
228 | } LNXIOURINGCQ;
|
---|
229 | AssertCompileSize(LNXIOURINGCQ, 40);
|
---|
230 | /** Pointer to a Linux u_ioring CQ ring header. */
|
---|
231 | typedef LNXIOURINGCQ *PLNXIOURINGCQ;
|
---|
232 | /** Pointer to a constant Linux u_ioring CQ ring header. */
|
---|
233 | typedef const LNXIOURINGCQ *PCLNXIOURINGCQ;
|
---|
234 |
|
---|
235 |
|
---|
236 | /**
|
---|
237 | * Linux io_uring parameters passed to io_uring_setup().
|
---|
238 | */
|
---|
239 | typedef struct LNXIOURINGPARAMS
|
---|
240 | {
|
---|
241 | /** Number of SQ entries requested, must be power of 2. */
|
---|
242 | uint32_t u32SqEntriesCnt;
|
---|
243 | /** Number of CQ entries requested, must be power of 2. */
|
---|
244 | uint32_t u32CqEntriesCnt;
|
---|
245 | /** Flags for the ring, , see LNX_IOURING_SETUP_F_*. */
|
---|
246 | uint32_t u32Flags;
|
---|
247 | /** Affinity of the kernel side SQ polling thread if enabled. */
|
---|
248 | uint32_t u32SqPollCpu;
|
---|
249 | /** Milliseconds after the kernel side SQ polling thread goes to sleep
|
---|
250 | * if there is are no requests to process. */
|
---|
251 | uint32_t u32SqPollIdleMs;
|
---|
252 | /** Reserved. */
|
---|
253 | uint32_t au32Rsvd0[5];
|
---|
254 | /** Offsets returned for the submission queue. */
|
---|
255 | LNXIOURINGSQ SqOffsets;
|
---|
256 | /** Offsets returned for the completion queue. */
|
---|
257 | LNXIOURINGCQ CqOffsets;
|
---|
258 | } LNXIOURINGPARAMS;
|
---|
259 | /** Pointer to Linux io_uring parameters. */
|
---|
260 | typedef LNXIOURINGPARAMS *PLNXIOURINGPARAMS;
|
---|
261 | /** Pointer to constant Linux io_uring parameters. */
|
---|
262 | typedef const LNXIOURINGPARAMS *PCLNXIOURINGPARAMS;
|
---|
263 |
|
---|
264 |
|
---|
265 | /** @name LNXIOURINGSQE::u8Opc defined opcodes.
|
---|
266 | * @{ */
|
---|
267 | /** Opcode to profile the interface, does nothing. */
|
---|
268 | #define LNX_IOURING_OPC_NOP 0
|
---|
269 | /** preadv() like request. */
|
---|
270 | #define LNX_IOURING_OPC_READV 1
|
---|
271 | /** pwritev() like request. */
|
---|
272 | #define LNX_IOURING_OPC_WRITEV 2
|
---|
273 | /** fsync() like request. */
|
---|
274 | #define LNX_IOURING_OPC_FSYNC 3
|
---|
275 | /** Read request using a fixed preset buffer. */
|
---|
276 | #define LNX_IOURING_OPC_READ_FIXED 4
|
---|
277 | /** Write request using a fixed preset buffer. */
|
---|
278 | #define LNX_IOURING_OPC_WRITE_FIXED 5
|
---|
279 | /** Add file descriptor to pollset. */
|
---|
280 | #define LNX_IOURING_OPC_POLL_ADD 6
|
---|
281 | /** Remove file descriptor from pollset. */
|
---|
282 | #define LNX_IOURING_OPC_POLL_REMOVE 7
|
---|
283 | /** sync_file_range() like request. */
|
---|
284 | #define LNX_IOURING_OPC_SYNC_FILE_RANGE 8
|
---|
285 | /** sendmsg() like request. */
|
---|
286 | #define LNX_IOURING_OPC_SENDMSG 9
|
---|
287 | /** recvmsg() like request. */
|
---|
288 | #define LNX_IOURING_OPC_RECVMSG 10
|
---|
289 | /** @} */
|
---|
290 |
|
---|
291 |
|
---|
292 | /** @name Additional flags for LNX_IOURING_OPC_FSYNC requests.
|
---|
293 | * @{ */
|
---|
294 | /** Sync userdata as well instead of metadata only. */
|
---|
295 | #define LNX_IOURING_OPC_FSYNC_DATASYNC RT_BIT_32(0)
|
---|
296 | /** @} */
|
---|
297 |
|
---|
298 |
|
---|
299 | /** @name Flags for the LNX_IOURING_SYSCALL_SETUP syscall.
|
---|
300 | * @{ */
|
---|
301 | /** The I/O context is polled. */
|
---|
302 | #define LNX_IOURING_SETUP_F_IOPOLL RT_BIT_32(0)
|
---|
303 | /** The kernel should poll the submission queue. */
|
---|
304 | #define LNX_IOURING_SETUP_F_SQPOLL RT_BIT_32(1)
|
---|
305 | /** Sets the CPU affinity of the kernel thread polling the submission queue. */
|
---|
306 | #define LNX_IOURING_SETUP_F_SQAFF RT_BIT_32(2)
|
---|
307 | /** @} */
|
---|
308 |
|
---|
309 |
|
---|
310 | /** @name Flags for LNXIOURINGSQE::u8Flags.
|
---|
311 | * @{ */
|
---|
312 | /** The file descriptor was registered before use. */
|
---|
313 | #define LNX_IOURING_SQE_F_FIXED_FILE RT_BIT(0)
|
---|
314 | /** Complete all active requests before issuing the request with the flag set. */
|
---|
315 | #define LNX_IOURING_SQE_F_IO_DRAIN RT_BIT(1)
|
---|
316 | /** Links the request with the flag set to the next one. */
|
---|
317 | #define LNX_IOURING_SQE_F_IO_LINK RT_BIT(2)
|
---|
318 | /** @} */
|
---|
319 |
|
---|
320 |
|
---|
321 | /** @name Magic mmap offsets to map submission and completion queues.
|
---|
322 | * @{ */
|
---|
323 | /** Used to map the submission queue. */
|
---|
324 | #define LNX_IOURING_MMAP_OFF_SQ UINT64_C(0)
|
---|
325 | /** Used to map the completion queue. */
|
---|
326 | #define LNX_IOURING_MMAP_OFF_CQ UINT64_C(0x8000000)
|
---|
327 | /** Used to map the submission queue entries array. */
|
---|
328 | #define LNX_IOURING_MMAP_OFF_SQES UINT64_C(0x10000000)
|
---|
329 | /** @} */
|
---|
330 |
|
---|
331 |
|
---|
332 | /** @name Flags used for the SQ ring structure.
|
---|
333 | * @{ */
|
---|
334 | /** The kernel thread needs a io_uring_enter() wakeup to continue processing requests. */
|
---|
335 | #define LNX_IOURING_SQ_RING_F_NEED_WAKEUP RT_BIT_32(0)
|
---|
336 | /** @} */
|
---|
337 |
|
---|
338 |
|
---|
339 | /** @name Flags for the LNX_IOURING_SYSCALL_ENTER syscall.
|
---|
340 | * @{ */
|
---|
341 | /** Retrieve completion events for the completion queue. */
|
---|
342 | #define LNX_IOURING_ENTER_F_GETEVENTS RT_BIT_32(0)
|
---|
343 | /** Wakes the suspended kernel thread processing the requests. */
|
---|
344 | #define LNX_IOURING_ENTER_F_SQ_WAKEUP RT_BIT_32(1)
|
---|
345 | /** @} */
|
---|
346 |
|
---|
347 |
|
---|
348 | /** @name Opcodes for the LNX_IOURING_SYSCALL_REGISTER syscall.
|
---|
349 | * @{ */
|
---|
350 | /** Register a fixed set of buffers. */
|
---|
351 | #define LNX_IOURING_REGISTER_OPC_BUFFERS_REGISTER 0
|
---|
352 | /** Unregisters a fixed set of buffers registered previously. */
|
---|
353 | #define LNX_IOURING_REGISTER_OPC_BUFFERS_UNREGISTER 1
|
---|
354 | /** Register a fixed set of files. */
|
---|
355 | #define LNX_IOURING_REGISTER_OPC_FILES_REGISTER 2
|
---|
356 | /** Unregisters a fixed set of files registered previously. */
|
---|
357 | #define LNX_IOURING_REGISTER_OPC_FILES_UNREGISTER 3
|
---|
358 | /** Register an eventfd associated with the I/O ring. */
|
---|
359 | #define LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER 4
|
---|
360 | /** Unregisters an eventfd registered previously. */
|
---|
361 | #define LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER 5
|
---|
362 | /** @} */
|
---|
363 |
|
---|
364 |
|
---|
365 | /**
|
---|
366 | * SQ ring structure.
|
---|
367 | *
|
---|
368 | * @note Some members of this structure point to memory shared with the kernel,
|
---|
369 | * hence the volatile keyword.
|
---|
370 | */
|
---|
371 | typedef struct RTIOQUEUESQ
|
---|
372 | {
|
---|
373 | /** Pointer to the head counter. */
|
---|
374 | volatile uint32_t *pidxHead;
|
---|
375 | /** Pointer to the tail counter. */
|
---|
376 | volatile uint32_t *pidxTail;
|
---|
377 | /** Mask to apply for the counters to get to the index. */
|
---|
378 | uint32_t fRingMask;
|
---|
379 | /** Number of entries in the ring. */
|
---|
380 | uint32_t cEntries;
|
---|
381 | /** Pointer to the global flags. */
|
---|
382 | volatile uint32_t *pfFlags;
|
---|
383 | /** Pointer to the indirection array used for indexing the real SQ entries. */
|
---|
384 | volatile uint32_t *paidxSqes;
|
---|
385 | } RTIOQUEUESQ;
|
---|
386 |
|
---|
387 |
|
---|
388 | /**
|
---|
389 | * CQ ring structure.
|
---|
390 | *
|
---|
391 | * @note Some members of this structure point to memory shared with the kernel,
|
---|
392 | * hence the volatile keyword.
|
---|
393 | */
|
---|
394 | typedef struct RTIOQUEUECQ
|
---|
395 | {
|
---|
396 | /** Pointer to the head counter. */
|
---|
397 | volatile uint32_t *pidxHead;
|
---|
398 | /** Pointer to the tail counter. */
|
---|
399 | volatile uint32_t *pidxTail;
|
---|
400 | /** Mask to apply for the counters to get to the index. */
|
---|
401 | uint32_t fRingMask;
|
---|
402 | /** Number of entries in the ring. */
|
---|
403 | uint32_t cEntries;
|
---|
404 | /** Pointer to the completion entry ring. */
|
---|
405 | volatile LNXIOURINGCQE *paCqes;
|
---|
406 | } RTIOQUEUECQ;
|
---|
407 |
|
---|
408 |
|
---|
409 | /**
|
---|
410 | * Internal I/O queue provider instance data.
|
---|
411 | */
|
---|
412 | typedef struct RTIOQUEUEPROVINT
|
---|
413 | {
|
---|
414 | /** The io_uring file descriptor. */
|
---|
415 | int iFdIoCtx;
|
---|
416 | /** The eventfd file descriptor registered with the ring. */
|
---|
417 | int iFdEvt;
|
---|
418 | /** The submission queue. */
|
---|
419 | RTIOQUEUESQ Sq;
|
---|
420 | /** The currently uncommitted tail for the SQ. */
|
---|
421 | uint32_t idxSqTail;
|
---|
422 | /** Numbere of uncommitted SQEs. */
|
---|
423 | uint32_t cSqesToCommit;
|
---|
424 | /** The completion queue. */
|
---|
425 | RTIOQUEUECQ Cq;
|
---|
426 | /** Pointer to the mapped SQES entries. */
|
---|
427 | PLNXIOURINGSQE paSqes;
|
---|
428 | /** Pointer to the iovec structure used for non S/G requests. */
|
---|
429 | struct iovec *paIoVecs;
|
---|
430 | /** Pointer returned by mmap() for the SQ ring, used for unmapping. */
|
---|
431 | void *pvMMapSqRing;
|
---|
432 | /** Pointer returned by mmap() for the CQ ring, used for unmapping. */
|
---|
433 | void *pvMMapCqRing;
|
---|
434 | /** Pointer returned by mmap() for the SQ entries array, used for unmapping. */
|
---|
435 | void *pvMMapSqes;
|
---|
436 | /** Size of the mapped SQ ring, used for unmapping. */
|
---|
437 | size_t cbMMapSqRing;
|
---|
438 | /** Size of the mapped CQ ring, used for unmapping. */
|
---|
439 | size_t cbMMapCqRing;
|
---|
440 | /** Size of the mapped SQ entries array, used for unmapping. */
|
---|
441 | size_t cbMMapSqes;
|
---|
442 | /** Flag whether the waiter was woken up externally. */
|
---|
443 | volatile bool fExtIntr;
|
---|
444 | } RTIOQUEUEPROVINT;
|
---|
445 | /** Pointer to the internal I/O queue provider instance data. */
|
---|
446 | typedef RTIOQUEUEPROVINT *PRTIOQUEUEPROVINT;
|
---|
447 |
|
---|
448 |
|
---|
449 | /*********************************************************************************************************************************
|
---|
450 | * Internal Functions *
|
---|
451 | *********************************************************************************************************************************/
|
---|
452 |
|
---|
453 | /**
|
---|
454 | * Syscall wrapper for io_uring_setup().
|
---|
455 | *
|
---|
456 | * @returns IPRT status code.
|
---|
457 | * @param cEntries Number of entries for submission and completion queues.
|
---|
458 | * @param pParams Additional parameters for the I/O ring and updated return values
|
---|
459 | * on success.
|
---|
460 | * @param piFdIoCtx Where to store the file descriptor of the I/O ring on success.
|
---|
461 | */
|
---|
462 | DECLINLINE(int) rtIoQueueLnxIoURingSetup(uint32_t cEntries, PLNXIOURINGPARAMS pParams, int32_t *piFdIoCtx)
|
---|
463 | {
|
---|
464 | int rcLnx = syscall(LNX_IOURING_SYSCALL_SETUP, cEntries, pParams);
|
---|
465 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
466 | return RTErrConvertFromErrno(errno);
|
---|
467 |
|
---|
468 | *piFdIoCtx = rcLnx;
|
---|
469 | return VINF_SUCCESS;
|
---|
470 | }
|
---|
471 |
|
---|
472 |
|
---|
473 | /**
|
---|
474 | * Syscall wrapper for io_uring_enter().
|
---|
475 | *
|
---|
476 | * @returns IPRT status code.
|
---|
477 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
478 | * @param cToSubmit Maximum number of requests waiting for processing.
|
---|
479 | * @param cMinComplete Minimum number of completion events to accumulate before returning.
|
---|
480 | * @param fFlags Flags for io_uring_enter(), see LNX_IOURING_ENTER_F_*.
|
---|
481 | */
|
---|
482 | DECLINLINE(int) rtIoQueueLnxIoURingEnter(int32_t iFdIoCtx, uint32_t cToSubmit, uint32_t cMinComplete,
|
---|
483 | uint32_t fFlags)
|
---|
484 | {
|
---|
485 | int rcLnx = syscall(LNX_IOURING_SYSCALL_ENTER, iFdIoCtx, cToSubmit, cMinComplete, fFlags,
|
---|
486 | NULL, 0);
|
---|
487 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
488 | return RTErrConvertFromErrno(errno);
|
---|
489 |
|
---|
490 | return VINF_SUCCESS;
|
---|
491 | }
|
---|
492 |
|
---|
493 |
|
---|
494 | /**
|
---|
495 | * Syscall wrapper for io_uring_register().
|
---|
496 | *
|
---|
497 | * @returns IPRT status code.
|
---|
498 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
499 | * @param uOpc Operation to perform, see LNX_IOURING_REGISTER_OPC_*.
|
---|
500 | * @param pvArg Opaque arguments.
|
---|
501 | * @param cArgs Number of arguments.
|
---|
502 | */
|
---|
503 | DECLINLINE(int) rtIoQueueLnxIoURingRegister(int32_t iFdIoCtx, uint32_t uOpc, void *pvArg,
|
---|
504 | uint32_t cArgs)
|
---|
505 | {
|
---|
506 | int rcLnx = syscall(LNX_IOURING_SYSCALL_REGISTER, iFdIoCtx, uOpc, pvArg, cArgs);
|
---|
507 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
508 | return RTErrConvertFromErrno(errno);
|
---|
509 |
|
---|
510 | return VINF_SUCCESS;
|
---|
511 | }
|
---|
512 |
|
---|
513 |
|
---|
514 | /**
|
---|
515 | * mmap() wrapper for the common bits and returning an IPRT status code.
|
---|
516 | *
|
---|
517 | * @returns IPRT status code.
|
---|
518 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
519 | * @param offMmap The mmap() offset.
|
---|
520 | * @param cbMmap How much to map.
|
---|
521 | * @param ppv Where to store the pointer to the mapping on success.
|
---|
522 | */
|
---|
523 | DECLINLINE(int) rtIoQueueLnxIoURingMmap(int iFdIoCtx, off_t offMmap, size_t cbMmap, void **ppv)
|
---|
524 | {
|
---|
525 | void *pv = mmap(0, cbMmap, PROT_READ | PROT_WRITE , MAP_SHARED | MAP_POPULATE, iFdIoCtx, offMmap);
|
---|
526 | if (pv != MAP_FAILED)
|
---|
527 | {
|
---|
528 | *ppv = pv;
|
---|
529 | return VINF_SUCCESS;
|
---|
530 | }
|
---|
531 |
|
---|
532 | return RTErrConvertFromErrno(errno);
|
---|
533 | }
|
---|
534 |
|
---|
535 |
|
---|
536 | /**
|
---|
537 | * eventfd2() syscall wrapper.
|
---|
538 | *
|
---|
539 | * @returns IPRT status code.
|
---|
540 | * @param uValInit The initial value of the maintained counter.
|
---|
541 | * @param fFlags Flags controlling the eventfd behavior.
|
---|
542 | * @param piFdEvt Where to store the file descriptor of the eventfd object on success.
|
---|
543 | */
|
---|
544 | DECLINLINE(int) rtIoQueueLnxEventfd2(uint32_t uValInit, uint32_t fFlags, int *piFdEvt)
|
---|
545 | {
|
---|
546 | int rcLnx = syscall(LNX_SYSCALL_EVENTFD2, uValInit, fFlags);
|
---|
547 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
548 | return RTErrConvertFromErrno(errno);
|
---|
549 |
|
---|
550 | *piFdEvt = rcLnx;
|
---|
551 | return VINF_SUCCESS;
|
---|
552 | }
|
---|
553 |
|
---|
554 |
|
---|
555 | /**
|
---|
556 | * Checks the completion event queue for pending events.
|
---|
557 | *
|
---|
558 | * @param pThis The provider instance.
|
---|
559 | * @param paCEvt Pointer to the array of completion events.
|
---|
560 | * @param cCEvt Maximum number of completion events the array can hold.
|
---|
561 | * @param pcCEvtSeen Where to store the number of completion events processed.
|
---|
562 | */
|
---|
563 | static void rtIoQueueLnxIoURingFileProvCqCheck(PRTIOQUEUEPROVINT pThis, PRTIOQUEUECEVT paCEvt,
|
---|
564 | uint32_t cCEvt, uint32_t *pcCEvtSeen)
|
---|
565 | {
|
---|
566 | /* The fencing and atomic accesses are kind of overkill and probably not required (dev paranoia). */
|
---|
567 | ASMReadFence();
|
---|
568 | uint32_t idxCqHead = ASMAtomicReadU32(pThis->Cq.pidxHead);
|
---|
569 | uint32_t idxCqTail = ASMAtomicReadU32(pThis->Cq.pidxTail);
|
---|
570 | ASMReadFence();
|
---|
571 |
|
---|
572 | uint32_t cCEvtSeen = 0;
|
---|
573 |
|
---|
574 | while ( idxCqTail != idxCqHead
|
---|
575 | && cCEvtSeen < cCEvt)
|
---|
576 | {
|
---|
577 | /* Get the index. */
|
---|
578 | uint32_t idxCqe = idxCqHead & pThis->Cq.fRingMask;
|
---|
579 | volatile LNXIOURINGCQE *pCqe = &pThis->Cq.paCqes[idxCqe];
|
---|
580 |
|
---|
581 | paCEvt->pvUser = (void *)(uintptr_t)pCqe->u64User;
|
---|
582 | if (pCqe->rcLnx >= 0)
|
---|
583 | {
|
---|
584 | paCEvt->rcReq = VINF_SUCCESS;
|
---|
585 | paCEvt->cbXfered = (size_t)pCqe->rcLnx;
|
---|
586 | }
|
---|
587 | else
|
---|
588 | paCEvt->rcReq = RTErrConvertFromErrno(-pCqe->rcLnx);
|
---|
589 |
|
---|
590 | #ifdef RT_STRICT /* poison */
|
---|
591 | memset((void *)pCqe, 0xff, sizeof(*pCqe));
|
---|
592 | #endif
|
---|
593 |
|
---|
594 | paCEvt++;
|
---|
595 | cCEvtSeen++;
|
---|
596 | idxCqHead++;
|
---|
597 | }
|
---|
598 |
|
---|
599 | *pcCEvtSeen = cCEvtSeen;
|
---|
600 |
|
---|
601 | /* Paranoia strikes again. */
|
---|
602 | ASMWriteFence();
|
---|
603 | ASMAtomicWriteU32(pThis->Cq.pidxHead, idxCqHead);
|
---|
604 | ASMWriteFence();
|
---|
605 | }
|
---|
606 |
|
---|
607 |
|
---|
608 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnIsSupported} */
|
---|
609 | static DECLCALLBACK(bool) rtIoQueueLnxIoURingFileProv_IsSupported(void)
|
---|
610 | {
|
---|
611 | /*
|
---|
612 | * Try to create a simple I/O ring and close it again.
|
---|
613 | * The common code/public API already checked for the proper handle type.
|
---|
614 | */
|
---|
615 | int iFdIoCtx = 0;
|
---|
616 | bool fSupp = false;
|
---|
617 | LNXIOURINGPARAMS Params;
|
---|
618 | RT_ZERO(Params);
|
---|
619 |
|
---|
620 | int rc = rtIoQueueLnxIoURingSetup(16, &Params, &iFdIoCtx);
|
---|
621 | if (RT_SUCCESS(rc))
|
---|
622 | {
|
---|
623 | /*
|
---|
624 | * Check that we can register an eventfd descriptor to get notified about
|
---|
625 | * completion events while being able to kick the waiter externally out of the wait.
|
---|
626 | */
|
---|
627 | int iFdEvt = 0;
|
---|
628 | rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &iFdEvt);
|
---|
629 | if (RT_SUCCESS(rc))
|
---|
630 | {
|
---|
631 | rc = rtIoQueueLnxIoURingRegister(iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER,
|
---|
632 | &iFdEvt, 1 /*cArgs*/);
|
---|
633 | if (RT_SUCCESS(rc))
|
---|
634 | fSupp = true;
|
---|
635 |
|
---|
636 | int rcLnx = close(iFdEvt); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
637 | }
|
---|
638 | int rcLnx = close(iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
639 | }
|
---|
640 |
|
---|
641 | return fSupp;
|
---|
642 | }
|
---|
643 |
|
---|
644 |
|
---|
645 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueInit} */
|
---|
646 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_QueueInit(RTIOQUEUEPROV hIoQueueProv, uint32_t fFlags,
|
---|
647 | uint32_t cSqEntries, uint32_t cCqEntries)
|
---|
648 | {
|
---|
649 | RT_NOREF(fFlags, cCqEntries);
|
---|
650 |
|
---|
651 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
652 | LNXIOURINGPARAMS Params;
|
---|
653 | RT_ZERO(Params);
|
---|
654 |
|
---|
655 | pThis->cSqesToCommit = 0;
|
---|
656 | pThis->fExtIntr = false;
|
---|
657 |
|
---|
658 | int rc = rtIoQueueLnxIoURingSetup(cSqEntries, &Params, &pThis->iFdIoCtx);
|
---|
659 | if (RT_SUCCESS(rc))
|
---|
660 | {
|
---|
661 | /* Map the rings into userspace. */
|
---|
662 | pThis->cbMMapSqRing = Params.SqOffsets.u32OffArray + Params.u32SqEntriesCnt * sizeof(uint32_t);
|
---|
663 | pThis->cbMMapCqRing = Params.CqOffsets.u32OffCqes + Params.u32CqEntriesCnt * sizeof(LNXIOURINGCQE);
|
---|
664 | pThis->cbMMapSqes = Params.u32SqEntriesCnt * sizeof(LNXIOURINGSQE);
|
---|
665 |
|
---|
666 | pThis->paIoVecs = (struct iovec *)RTMemAllocZ(Params.u32SqEntriesCnt * sizeof(struct iovec));
|
---|
667 | if (RT_LIKELY(pThis->paIoVecs))
|
---|
668 | {
|
---|
669 | rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &pThis->iFdEvt);
|
---|
670 | if (RT_SUCCESS(rc))
|
---|
671 | {
|
---|
672 | rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, &pThis->iFdEvt, 1 /*cArgs*/);
|
---|
673 | if (RT_SUCCESS(rc))
|
---|
674 | {
|
---|
675 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);
|
---|
676 | if (RT_SUCCESS(rc))
|
---|
677 | {
|
---|
678 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);
|
---|
679 | if (RT_SUCCESS(rc))
|
---|
680 | {
|
---|
681 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);
|
---|
682 | if (RT_SUCCESS(rc))
|
---|
683 | {
|
---|
684 | uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing;
|
---|
685 |
|
---|
686 | pThis->Sq.pidxHead = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead);
|
---|
687 | pThis->Sq.pidxTail = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail);
|
---|
688 | pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask);
|
---|
689 | pThis->Sq.cEntries = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries);
|
---|
690 | pThis->Sq.pfFlags = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags);
|
---|
691 | pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray);
|
---|
692 | pThis->idxSqTail = *pThis->Sq.pidxTail;
|
---|
693 |
|
---|
694 | pThis->paSqes = (PLNXIOURINGSQE)pThis->pvMMapSqes;
|
---|
695 |
|
---|
696 | pbTmp = (uint8_t *)pThis->pvMMapCqRing;
|
---|
697 |
|
---|
698 | pThis->Cq.pidxHead = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead);
|
---|
699 | pThis->Cq.pidxTail = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail);
|
---|
700 | pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask);
|
---|
701 | pThis->Cq.cEntries = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries);
|
---|
702 | pThis->Cq.paCqes = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes);
|
---|
703 | return VINF_SUCCESS;
|
---|
704 | }
|
---|
705 |
|
---|
706 | munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);
|
---|
707 | }
|
---|
708 |
|
---|
709 | munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);
|
---|
710 | }
|
---|
711 |
|
---|
712 | rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
|
---|
713 | AssertRC(rc);
|
---|
714 | }
|
---|
715 |
|
---|
716 | close(pThis->iFdEvt);
|
---|
717 | }
|
---|
718 |
|
---|
719 | RTMemFree(pThis->paIoVecs);
|
---|
720 | }
|
---|
721 |
|
---|
722 | int rcLnx = close(pThis->iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
723 | }
|
---|
724 |
|
---|
725 | return rc;
|
---|
726 | }
|
---|
727 |
|
---|
728 |
|
---|
729 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueDestroy} */
|
---|
730 | static DECLCALLBACK(void) rtIoQueueLnxIoURingFileProv_QueueDestroy(RTIOQUEUEPROV hIoQueueProv)
|
---|
731 | {
|
---|
732 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
733 |
|
---|
734 | int rcLnx = munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
735 | rcLnx = munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
736 | rcLnx = munmap(pThis->pvMMapSqes, pThis->cbMMapSqes); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
737 |
|
---|
738 | int rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
|
---|
739 | AssertRC(rc);
|
---|
740 |
|
---|
741 | close(pThis->iFdEvt);
|
---|
742 | close(pThis->iFdIoCtx);
|
---|
743 | RTMemFree(pThis->paIoVecs);
|
---|
744 |
|
---|
745 | RT_ZERO(pThis);
|
---|
746 | }
|
---|
747 |
|
---|
748 |
|
---|
749 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleRegister} */
|
---|
750 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleRegister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
|
---|
751 | {
|
---|
752 | RT_NOREF(hIoQueueProv, pHandle);
|
---|
753 | /** @todo Add support for fixed file sets later. */
|
---|
754 | return VINF_SUCCESS;
|
---|
755 | }
|
---|
756 |
|
---|
757 |
|
---|
758 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleDeregister} */
|
---|
759 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleDeregister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
|
---|
760 | {
|
---|
761 | RT_NOREF(hIoQueueProv, pHandle);
|
---|
762 | /** @todo Add support for fixed file sets later. */
|
---|
763 | return VINF_SUCCESS;
|
---|
764 | }
|
---|
765 |
|
---|
766 |
|
---|
767 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnReqPrepare} */
|
---|
768 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_ReqPrepare(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle, RTIOQUEUEOP enmOp,
|
---|
769 | uint64_t off, void *pvBuf, size_t cbBuf, uint32_t fReqFlags,
|
---|
770 | void *pvUser)
|
---|
771 | {
|
---|
772 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
773 | RT_NOREF(fReqFlags);
|
---|
774 |
|
---|
775 | uint32_t idx = pThis->idxSqTail & pThis->Sq.fRingMask;
|
---|
776 | PLNXIOURINGSQE pSqe = &pThis->paSqes[idx];
|
---|
777 | struct iovec *pIoVec = &pThis->paIoVecs[idx];
|
---|
778 |
|
---|
779 | pIoVec->iov_base = pvBuf;
|
---|
780 | pIoVec->iov_len = cbBuf;
|
---|
781 |
|
---|
782 | pSqe->u8Flags = 0;
|
---|
783 | pSqe->u16IoPrio = 0;
|
---|
784 | pSqe->i32Fd = (int32_t)RTFileToNative(pHandle->u.hFile);
|
---|
785 | pSqe->u64OffStart = off;
|
---|
786 | pSqe->u64AddrBufIoVec = (uint64_t)(uintptr_t)pIoVec;
|
---|
787 | pSqe->u32BufIoVecSz = 1;
|
---|
788 | pSqe->u64User = (uint64_t)(uintptr_t)pvUser;
|
---|
789 |
|
---|
790 | switch (enmOp)
|
---|
791 | {
|
---|
792 | case RTIOQUEUEOP_READ:
|
---|
793 | pSqe->u8Opc = LNX_IOURING_OPC_READV;
|
---|
794 | pSqe->uOpc.u32KrnlRwFlags = 0;
|
---|
795 | break;
|
---|
796 | case RTIOQUEUEOP_WRITE:
|
---|
797 | pSqe->u8Opc = LNX_IOURING_OPC_WRITEV;
|
---|
798 | pSqe->uOpc.u32KrnlRwFlags = 0;
|
---|
799 | break;
|
---|
800 | case RTIOQUEUEOP_SYNC:
|
---|
801 | pSqe->u8Opc = LNX_IOURING_OPC_FSYNC;
|
---|
802 | pSqe->uOpc.u32FsyncFlags = 0;
|
---|
803 | break;
|
---|
804 | default:
|
---|
805 | AssertMsgFailedReturn(("Invalid I/O queue operation: %d\n", enmOp),
|
---|
806 | VERR_INVALID_PARAMETER);
|
---|
807 | }
|
---|
808 |
|
---|
809 | pThis->Sq.paidxSqes[idx] = idx;
|
---|
810 | pThis->idxSqTail++;
|
---|
811 | pThis->cSqesToCommit++;
|
---|
812 | return VINF_SUCCESS;
|
---|
813 | }
|
---|
814 |
|
---|
815 |
|
---|
816 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnCommit} */
|
---|
817 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_Commit(RTIOQUEUEPROV hIoQueueProv, uint32_t *pcReqsCommitted)
|
---|
818 | {
|
---|
819 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
820 |
|
---|
821 | ASMWriteFence();
|
---|
822 | ASMAtomicWriteU32(pThis->Sq.pidxTail, pThis->idxSqTail);
|
---|
823 | ASMWriteFence();
|
---|
824 |
|
---|
825 | int rc = rtIoQueueLnxIoURingEnter(pThis->iFdIoCtx, pThis->cSqesToCommit, 0, 0 /*fFlags*/);
|
---|
826 | if (RT_SUCCESS(rc))
|
---|
827 | {
|
---|
828 | *pcReqsCommitted = pThis->cSqesToCommit;
|
---|
829 | pThis->cSqesToCommit = 0;
|
---|
830 | }
|
---|
831 |
|
---|
832 | return rc;
|
---|
833 | }
|
---|
834 |
|
---|
835 |
|
---|
836 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWait} */
|
---|
837 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWait(RTIOQUEUEPROV hIoQueueProv, PRTIOQUEUECEVT paCEvt, uint32_t cCEvt,
|
---|
838 | uint32_t cMinWait, uint32_t *pcCEvt, uint32_t fFlags)
|
---|
839 | {
|
---|
840 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
841 | int rc = VINF_SUCCESS;
|
---|
842 | uint32_t cCEvtSeen = 0;
|
---|
843 |
|
---|
844 | RT_NOREF(fFlags);
|
---|
845 |
|
---|
846 | /*
|
---|
847 | * Check the completion queue first for any completed events which might save us a
|
---|
848 | * context switch later on.
|
---|
849 | */
|
---|
850 | rtIoQueueLnxIoURingFileProvCqCheck(pThis, paCEvt, cCEvt, &cCEvtSeen);
|
---|
851 |
|
---|
852 | while ( cCEvtSeen < cMinWait
|
---|
853 | && RT_SUCCESS(rc))
|
---|
854 | {
|
---|
855 | /*
|
---|
856 | * We can employ a blocking read on the event file descriptor, it will return
|
---|
857 | * either when woken up externally or when there are completion events pending.
|
---|
858 | */
|
---|
859 | uint64_t uCnt = 0; /**< The counter value returned upon a successful read(). */
|
---|
860 | ssize_t rcLnx = read(pThis->iFdEvt, &uCnt, sizeof(uCnt));
|
---|
861 | if (rcLnx == sizeof(uCnt))
|
---|
862 | {
|
---|
863 | uint32_t cCEvtThisSeen = 0;
|
---|
864 | rtIoQueueLnxIoURingFileProvCqCheck(pThis, &paCEvt[cCEvtSeen], cCEvt - cCEvtSeen, &cCEvtThisSeen);
|
---|
865 | cCEvtSeen += cCEvtThisSeen;
|
---|
866 |
|
---|
867 | /* Whether we got woken up externally. */
|
---|
868 | if (ASMAtomicXchgBool(&pThis->fExtIntr, false))
|
---|
869 | rc = VERR_INTERRUPTED;
|
---|
870 | }
|
---|
871 | else if (rcLnx == -1)
|
---|
872 | rc = RTErrConvertFromErrno(errno);
|
---|
873 | else
|
---|
874 | AssertMsgFailed(("Unexpected read() -> 0\n"));
|
---|
875 | }
|
---|
876 |
|
---|
877 | *pcCEvt = cCEvtSeen;
|
---|
878 | return rc;
|
---|
879 | }
|
---|
880 |
|
---|
881 |
|
---|
882 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWaitWakeup} */
|
---|
883 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWaitWakeup(RTIOQUEUEPROV hIoQueueProv)
|
---|
884 | {
|
---|
885 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
886 | int rc = VINF_SUCCESS;
|
---|
887 |
|
---|
888 | if (!ASMAtomicXchgBool(&pThis->fExtIntr, true))
|
---|
889 | {
|
---|
890 | const uint64_t uValAdd = 1;
|
---|
891 | ssize_t rcLnx = write(pThis->iFdEvt, &uValAdd, sizeof(uValAdd));
|
---|
892 |
|
---|
893 | Assert(rcLnx == -1 || rcLnx == sizeof(uValAdd));
|
---|
894 | if (rcLnx == -1)
|
---|
895 | rc = RTErrConvertFromErrno(errno);
|
---|
896 | }
|
---|
897 |
|
---|
898 | return rc;
|
---|
899 | }
|
---|
900 |
|
---|
901 |
|
---|
902 | /**
|
---|
903 | * Async file I/O queue provider virtual method table.
|
---|
904 | */
|
---|
905 | RT_DECL_DATA_CONST(RTIOQUEUEPROVVTABLE const) g_RTIoQueueLnxIoURingProv =
|
---|
906 | {
|
---|
907 | /** uVersion */
|
---|
908 | RTIOQUEUEPROVVTABLE_VERSION,
|
---|
909 | /** pszId */
|
---|
910 | "LnxIoURingFile",
|
---|
911 | /** cbIoQueueProv */
|
---|
912 | sizeof(RTIOQUEUEPROVINT),
|
---|
913 | /** enmHnd */
|
---|
914 | RTHANDLETYPE_FILE,
|
---|
915 | /** fFlags */
|
---|
916 | 0,
|
---|
917 | /** pfnIsSupported */
|
---|
918 | rtIoQueueLnxIoURingFileProv_IsSupported,
|
---|
919 | /** pfnQueueInit */
|
---|
920 | rtIoQueueLnxIoURingFileProv_QueueInit,
|
---|
921 | /** pfnQueueDestroy */
|
---|
922 | rtIoQueueLnxIoURingFileProv_QueueDestroy,
|
---|
923 | /** pfnHandleRegister */
|
---|
924 | rtIoQueueLnxIoURingFileProv_HandleRegister,
|
---|
925 | /** pfnHandleDeregister */
|
---|
926 | rtIoQueueLnxIoURingFileProv_HandleDeregister,
|
---|
927 | /** pfnReqPrepare */
|
---|
928 | rtIoQueueLnxIoURingFileProv_ReqPrepare,
|
---|
929 | /** pfnReqPrepareSg */
|
---|
930 | NULL,
|
---|
931 | /** pfnCommit */
|
---|
932 | rtIoQueueLnxIoURingFileProv_Commit,
|
---|
933 | /** pfnEvtWait */
|
---|
934 | rtIoQueueLnxIoURingFileProv_EvtWait,
|
---|
935 | /** pfnEvtWaitWakeup */
|
---|
936 | rtIoQueueLnxIoURingFileProv_EvtWaitWakeup,
|
---|
937 | /** uEndMarker */
|
---|
938 | RTIOQUEUEPROVVTABLE_VERSION
|
---|
939 | };
|
---|
940 |
|
---|