1 | /* $Id: ioqueue-iouringfile-provider.cpp 79984 2019-07-25 17:25:41Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IPRT - I/O queue, Linux io_uring interface I/O file provider.
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2019 Oracle Corporation
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | * available from http://www.virtualbox.org. This file is free software;
|
---|
11 | * you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | * General Public License (GPL) as published by the Free Software
|
---|
13 | * Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | *
|
---|
17 | * The contents of this file may alternatively be used under the terms
|
---|
18 | * of the Common Development and Distribution License Version 1.0
|
---|
19 | * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | * VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | * CDDL are applicable instead of those of the GPL.
|
---|
22 | *
|
---|
23 | * You may elect to license modified versions of this file under the
|
---|
24 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | */
|
---|
26 |
|
---|
27 | /** @page pg_rtioqueue_linux RTIoQueue - Linux io_uring implementation notes
|
---|
28 | * @internal
|
---|
29 | *
|
---|
30 | * The io_uring interface is the most recent interface added to the Linux kernel
|
---|
31 | * to deliver fast and efficient I/O. It was first added with kernel version 5.1 and is
|
---|
32 | * thus not available on most systems as of writing this backend (July 2019).
|
---|
33 | * It supersedes the old async I/O interface and cleans up with some restrictions like
|
---|
34 | * having to disable caching for the file.
|
---|
35 | * The interface is centered around a submission and completion queue to queue multiple new
|
---|
36 | * requests for the kernel to process and get notified about completions to reduce the amount
|
---|
37 | * of context switches to an absolute minimum. It also offers advanced features like
|
---|
38 | * registering a fixed set of memory buffers for I/O upfront to reduce the processing overhead
|
---|
39 | * even more.
|
---|
40 | *
|
---|
41 | * The first implementation will only make use of the basic features and more advanced features
|
---|
42 | * will be added later.
|
---|
43 | * The adept developer probably noticed that the public IPRT I/O queue API resembles the io_uring
|
---|
44 | * interface in many aspects. This is not by accident but to reduce our own overhead as much as possible
|
---|
45 | * while still keeping a consistent platform independent API which allows efficient implementations on
|
---|
46 | * other hosts when they come up.
|
---|
47 | *
|
---|
48 | * The public kernel io_uring interface is completely defined in this file to avoid dragging in additional
|
---|
49 | * dependencies and to avoid compile problems on older hosts missing the interface just like it is done
|
---|
50 | * for the Linux RTFileAio* API The necessary interface definitions and descriptions where retrieved from:
|
---|
51 | * * http://kernel.dk/io_uring.pdf
|
---|
52 | * * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/io_uring.h
|
---|
53 | */
|
---|
54 |
|
---|
55 |
|
---|
56 | /*********************************************************************************************************************************
|
---|
57 | * Header Files *
|
---|
58 | *********************************************************************************************************************************/
|
---|
59 | #define LOG_GROUP RTLOGGROUP_IOQUEUE
|
---|
60 | #include <iprt/ioqueue.h>
|
---|
61 |
|
---|
62 | #include <iprt/assertcompile.h>
|
---|
63 | #include <iprt/asm.h>
|
---|
64 | #include <iprt/errcore.h>
|
---|
65 | #include <iprt/file.h>
|
---|
66 | #include <iprt/log.h>
|
---|
67 | #include <iprt/mem.h>
|
---|
68 | #include <iprt/string.h>
|
---|
69 |
|
---|
70 | #include <errno.h>
|
---|
71 | #include <unistd.h>
|
---|
72 | #include <signal.h>
|
---|
73 | #include <sys/mman.h>
|
---|
74 | #include <sys/syscall.h>
|
---|
75 | #include <sys/uio.h>
|
---|
76 |
|
---|
77 | #include "internal/ioqueue.h"
|
---|
78 |
|
---|
79 |
|
---|
80 | /*********************************************************************************************************************************
|
---|
81 | * Defined Constants And Macros *
|
---|
82 | *********************************************************************************************************************************/
|
---|
83 |
|
---|
84 | /** The syscall number of io_uring_setup(). */
|
---|
85 | #define LNX_IOURING_SYSCALL_SETUP 425
|
---|
86 | /** The syscall number of io_uring_enter(). */
|
---|
87 | #define LNX_IOURING_SYSCALL_ENTER 426
|
---|
88 | /** The syscall number of io_uring_register(). */
|
---|
89 | #define LNX_IOURING_SYSCALL_REGISTER 427
|
---|
90 | /** eventfd2() syscall not associated with io_uring but used for kicking waiters. */
|
---|
91 | #define LNX_SYSCALL_EVENTFD2 19
|
---|
92 |
|
---|
93 |
|
---|
94 | /*********************************************************************************************************************************
|
---|
95 | * Structures and Typedefs *
|
---|
96 | *********************************************************************************************************************************/
|
---|
97 |
|
---|
98 | /**
|
---|
99 | * Linux io_uring completion event.
|
---|
100 | */
|
---|
101 | typedef struct LNXIOURINGCQE
|
---|
102 | {
|
---|
103 | /** Opaque user data associated with the completed request. */
|
---|
104 | uint64_t u64User;
|
---|
105 | /** The status code of the request. */
|
---|
106 | int32_t rcLnx;
|
---|
107 | /** Some flags which are not used as of now. */
|
---|
108 | uint32_t fFlags;
|
---|
109 | } LNXIOURINGCQE;
|
---|
110 | AssertCompileSize(LNXIOURINGCQE, 16);
|
---|
111 | /** Pointer to a Linux io_uring completion event. */
|
---|
112 | typedef LNXIOURINGCQE *PLNXIOURINGCQE;
|
---|
113 | /** Pointer to a constant linux io_uring completion event. */
|
---|
114 | typedef const LNXIOURINGCQE *PCLNXIOURINGCQE;
|
---|
115 |
|
---|
116 |
|
---|
117 | /**
|
---|
118 | * Linux io_uring submission queue entry.
|
---|
119 | */
|
---|
120 | typedef struct LNXIOURINGSQE
|
---|
121 | {
|
---|
122 | /** The opcode for the request. */
|
---|
123 | uint8_t u8Opc;
|
---|
124 | /** Common flags for the request. */
|
---|
125 | uint8_t u8Flags;
|
---|
126 | /** Assigned I/O priority. */
|
---|
127 | uint16_t u16IoPrio;
|
---|
128 | /** The file descriptor the request is for. */
|
---|
129 | int32_t i32Fd;
|
---|
130 | /** The start offset into the file for the request. */
|
---|
131 | uint64_t u64OffStart;
|
---|
132 | /** Buffer pointer or Pointer to io vector array depending on opcode. */
|
---|
133 | uint64_t u64AddrBufIoVec;
|
---|
134 | /** Size of the buffer in bytes or number of io vectors. */
|
---|
135 | uint32_t u32BufIoVecSz;
|
---|
136 | /** Opcode dependent data. */
|
---|
137 | union
|
---|
138 | {
|
---|
139 | /** Flags for read/write requests. */
|
---|
140 | uint32_t u32KrnlRwFlags;
|
---|
141 | /** Flags for fsync() like requests. */
|
---|
142 | uint32_t u32FsyncFlags;
|
---|
143 | /** Flags for poll() like requests. */
|
---|
144 | uint16_t u16PollFlags;
|
---|
145 | /** Flags for sync_file_range() like requests. */
|
---|
146 | uint32_t u32SyncFileRangeFlags;
|
---|
147 | /** Flags for requests requiring a msg structure. */
|
---|
148 | uint32_t u32MsgFlags;
|
---|
149 | } uOpc;
|
---|
150 | /** Opaque user data associated with the request and returned durign completion. */
|
---|
151 | uint64_t u64User;
|
---|
152 | /** Request type dependent data. */
|
---|
153 | union
|
---|
154 | {
|
---|
155 | /** Fixed buffer index if indicated by the request flags. */
|
---|
156 | uint16_t u16FixedBufIdx;
|
---|
157 | /** Padding to align the structure to 64 bytes. */
|
---|
158 | uint64_t au64Padding[3];
|
---|
159 | } uReq;
|
---|
160 | } LNXIOURINGSQE;
|
---|
161 | AssertCompileSize(LNXIOURINGSQE, 64);
|
---|
162 | /** Pointer to a Linux io_uring submission queue entry. */
|
---|
163 | typedef LNXIOURINGSQE *PLNXIOURINGSQE;
|
---|
164 | /** Pointer to a constant Linux io_uring submission queue entry. */
|
---|
165 | typedef const LNXIOURINGSQE *PCLNXIOURINGSQE;
|
---|
166 |
|
---|
167 |
|
---|
168 | /**
|
---|
169 | * Linux u_ioring SQ ring header structure to maintain the queue.
|
---|
170 | */
|
---|
171 | typedef struct LNXIOURINGSQ
|
---|
172 | {
|
---|
173 | /** The current head position to fill in new requests. */
|
---|
174 | uint32_t u32OffHead;
|
---|
175 | /** The current tail position the kernel starts processing from. */
|
---|
176 | uint32_t u32OffTail;
|
---|
177 | /** The mask for the head and tail counters to apply to retrieve the index. */
|
---|
178 | uint32_t u32OffRingMask;
|
---|
179 | /** Number of entries in the SQ ring. */
|
---|
180 | uint32_t u32OffRingEntries;
|
---|
181 | /** Flags set asychronously by the kernel. */
|
---|
182 | uint32_t u32OffFlags;
|
---|
183 | /** Counter of dropped requests. */
|
---|
184 | uint32_t u32OffDroppedReqs;
|
---|
185 | /** Offset where to find the array of SQ entries. */
|
---|
186 | uint32_t u32OffArray;
|
---|
187 | /** Reserved. */
|
---|
188 | uint32_t u32Rsvd0;
|
---|
189 | /** Reserved. */
|
---|
190 | uint64_t u64Rsvd1;
|
---|
191 | } LNXIOURINGSQ;
|
---|
192 | AssertCompileSize(LNXIOURINGSQ, 40);
|
---|
193 | /** Pointer to a Linux u_ioring SQ ring header. */
|
---|
194 | typedef LNXIOURINGSQ *PLNXIOURINGSQ;
|
---|
195 | /** Pointer to a constant Linux u_ioring SQ ring header. */
|
---|
196 | typedef const LNXIOURINGSQ *PCLNXIOURINGSQ;
|
---|
197 |
|
---|
198 |
|
---|
199 | /**
|
---|
200 | * Linux io_uring CQ ring header structure to maintain the queue.
|
---|
201 | */
|
---|
202 | typedef struct LNXIOURINGCQ
|
---|
203 | {
|
---|
204 | /** The current head position the kernel modifies when completion events happen. */
|
---|
205 | uint32_t u32OffHead;
|
---|
206 | /** The current tail position to read completion events from. */
|
---|
207 | uint32_t u32OffTail;
|
---|
208 | /** The mask for the head and tail counters to apply to retrieve the index. */
|
---|
209 | uint32_t u32OffRingMask;
|
---|
210 | /** Number of entries in the CQ ring. */
|
---|
211 | uint32_t u32OffRingEntries;
|
---|
212 | /** Number of CQ overflows happened. */
|
---|
213 | uint32_t u32OffOverflowCnt;
|
---|
214 | /** */
|
---|
215 | uint32_t u32OffCqes;
|
---|
216 | /** Reserved. */
|
---|
217 | uint64_t au64Rsvd0[2];
|
---|
218 | } LNXIOURINGCQ;
|
---|
219 | AssertCompileSize(LNXIOURINGCQ, 40);
|
---|
220 | /** Pointer to a Linux u_ioring CQ ring header. */
|
---|
221 | typedef LNXIOURINGCQ *PLNXIOURINGCQ;
|
---|
222 | /** Pointer to a constant Linux u_ioring CQ ring header. */
|
---|
223 | typedef const LNXIOURINGCQ *PCLNXIOURINGCQ;
|
---|
224 |
|
---|
225 |
|
---|
226 | /**
|
---|
227 | * Linux io_uring parameters passed to io_uring_setup().
|
---|
228 | */
|
---|
229 | typedef struct LNXIOURINGPARAMS
|
---|
230 | {
|
---|
231 | /** Number of SQ entries requested, must be power of 2. */
|
---|
232 | uint32_t u32SqEntriesCnt;
|
---|
233 | /** Number of CQ entries requested, must be power of 2. */
|
---|
234 | uint32_t u32CqEntriesCnt;
|
---|
235 | /** Flags for the ring, , see LNX_IOURING_SETUP_F_*. */
|
---|
236 | uint32_t u32Flags;
|
---|
237 | /** Affinity of the kernel side SQ polling thread if enabled. */
|
---|
238 | uint32_t u32SqPollCpu;
|
---|
239 | /** Milliseconds after the kernel side SQ polling thread goes to sleep
|
---|
240 | * if there is are no requests to process. */
|
---|
241 | uint32_t u32SqPollIdleMs;
|
---|
242 | /** Reserved. */
|
---|
243 | uint32_t au32Rsvd0[5];
|
---|
244 | /** Offsets returned for the submission queue. */
|
---|
245 | LNXIOURINGSQ SqOffsets;
|
---|
246 | /** Offsets returned for the completion queue. */
|
---|
247 | LNXIOURINGCQ CqOffsets;
|
---|
248 | } LNXIOURINGPARAMS;
|
---|
249 | /** Pointer to Linux io_uring parameters. */
|
---|
250 | typedef LNXIOURINGPARAMS *PLNXIOURINGPARAMS;
|
---|
251 | /** Pointer to constant Linux io_uring parameters. */
|
---|
252 | typedef const LNXIOURINGPARAMS *PCLNXIOURINGPARAMS;
|
---|
253 |
|
---|
254 |
|
---|
255 | /**
|
---|
256 | * @name LNXIOURINGSQE::u8Opc defined opcodes.
|
---|
257 | * @{ */
|
---|
258 | /** Opcode to profile the interface, does nothing. */
|
---|
259 | #define LNX_IOURING_OPC_NOP 0
|
---|
260 | /** preadv() like request. */
|
---|
261 | #define LNX_IOURING_OPC_READV 1
|
---|
262 | /** pwritev() like request. */
|
---|
263 | #define LNX_IOURING_OPC_WRITEV 2
|
---|
264 | /** fsync() like request. */
|
---|
265 | #define LNX_IOURING_OPC_FSYNC 3
|
---|
266 | /** Read request using a fixed preset buffer. */
|
---|
267 | #define LNX_IOURING_OPC_READ_FIXED 4
|
---|
268 | /** Write request using a fixed preset buffer. */
|
---|
269 | #define LNX_IOURING_OPC_WRITE_FIXED 5
|
---|
270 | /** Add file descriptor to pollset. */
|
---|
271 | #define LNX_IOURING_OPC_POLL_ADD 6
|
---|
272 | /** Remove file descriptor from pollset. */
|
---|
273 | #define LNX_IOURING_OPC_POLL_REMOVE 7
|
---|
274 | /** sync_file_range() like request. */
|
---|
275 | #define LNX_IOURING_OPC_SYNC_FILE_RANGE 8
|
---|
276 | /** sendmsg() like request. */
|
---|
277 | #define LNX_IOURING_OPC_SENDMSG 9
|
---|
278 | /** recvmsg() like request. */
|
---|
279 | #define LNX_IOURING_OPC_RECVMSG 10
|
---|
280 | /** @} */
|
---|
281 |
|
---|
282 |
|
---|
283 | /**
|
---|
284 | * @name Additional flags for LNX_IOURING_OPC_FSYNC requests.
|
---|
285 | * @{ */
|
---|
286 | /** Sync userdata as well instead of metadata only. */
|
---|
287 | #define LNX_IOURING_OPC_FSYNC_DATASYNC RT_BIT_32(0)
|
---|
288 | /** @} */
|
---|
289 |
|
---|
290 |
|
---|
291 | /**
|
---|
292 | * @name Flags for the LNX_IOURING_SYSCALL_SETUP syscall.
|
---|
293 | * @{ */
|
---|
294 | /** The I/O context is polled. */
|
---|
295 | #define LNX_IOURING_SETUP_F_IOPOLL RT_BIT_32(0)
|
---|
296 | /** The kernel should poll the submission queue. */
|
---|
297 | #define LNX_IOURING_SETUP_F_SQPOLL RT_BIT_32(1)
|
---|
298 | /** Sets the CPU affinity of the kernel thread polling the submission queue. */
|
---|
299 | #define LNX_IOURING_SETUP_F_SQAFF RT_BIT_32(2)
|
---|
300 | /** @} */
|
---|
301 |
|
---|
302 |
|
---|
303 | /**
|
---|
304 | * @name Flags for LNXIOURINGSQE::u8Flags.
|
---|
305 | * @{ */
|
---|
306 | /** The file descriptor was registered before use. */
|
---|
307 | #define LNX_IOURING_SQE_F_FIXED_FILE RT_BIT(0)
|
---|
308 | /** Complete all active requests before issuing the request with the flag set. */
|
---|
309 | #define LNX_IOURING_SQE_F_IO_DRAIN RT_BIT(1)
|
---|
310 | /** Links the request with the flag set to the next one. */
|
---|
311 | #define LNX_IOURING_SQE_F_IO_LINK RT_BIT(2)
|
---|
312 | /** @} */
|
---|
313 |
|
---|
314 |
|
---|
315 | /**
|
---|
316 | * @name Magic mmap offsets to map submission and completion queues.
|
---|
317 | * @{ */
|
---|
318 | /** Used to map the submission queue. */
|
---|
319 | #define LNX_IOURING_MMAP_OFF_SQ UINT64_C(0)
|
---|
320 | /** Used to map the completion queue. */
|
---|
321 | #define LNX_IOURING_MMAP_OFF_CQ UINT64_C(0x8000000)
|
---|
322 | /** Used to map the submission queue entries array. */
|
---|
323 | #define LNX_IOURING_MMAP_OFF_SQES UINT64_C(0x10000000)
|
---|
324 | /** @} */
|
---|
325 |
|
---|
326 |
|
---|
327 | /**
|
---|
328 | * @name Flags used for the SQ ring structure.
|
---|
329 | * @{ */
|
---|
330 | /** The kernel thread needs a io_uring_enter() wakeup to continue processing requests. */
|
---|
331 | #define LNX_IOURING_SQ_RING_F_NEED_WAKEUP RT_BIT_32(0)
|
---|
332 | /** @} */
|
---|
333 |
|
---|
334 |
|
---|
335 | /**
|
---|
336 | * @name Flags for the LNX_IOURING_SYSCALL_ENTER syscall.
|
---|
337 | * { */
|
---|
338 | /** Retrieve completion events for the completion queue. */
|
---|
339 | #define LNX_IOURING_ENTER_F_GETEVENTS RT_BIT_32(0)
|
---|
340 | /** Wakes the suspended kernel thread processing the requests. */
|
---|
341 | #define LNX_IOURING_ENTER_F_SQ_WAKEUP RT_BIT_32(1)
|
---|
342 | /** @} */
|
---|
343 |
|
---|
344 |
|
---|
345 | /**
|
---|
346 | * @name Opcodes for the LNX_IOURING_SYSCALL_REGISTER syscall.
|
---|
347 | * { */
|
---|
348 | /** Register a fixed set of buffers. */
|
---|
349 | #define LNX_IOURING_REGISTER_OPC_BUFFERS_REGISTER 0
|
---|
350 | /** Unregisters a fixed set of buffers registered previously. */
|
---|
351 | #define LNX_IOURING_REGISTER_OPC_BUFFERS_UNREGISTER 1
|
---|
352 | /** Register a fixed set of files. */
|
---|
353 | #define LNX_IOURING_REGISTER_OPC_FILES_REGISTER 2
|
---|
354 | /** Unregisters a fixed set of files registered previously. */
|
---|
355 | #define LNX_IOURING_REGISTER_OPC_FILES_UNREGISTER 3
|
---|
356 | /** Register an eventfd associated with the I/O ring. */
|
---|
357 | #define LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER 4
|
---|
358 | /** Unregisters an eventfd registered previously. */
|
---|
359 | #define LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER 5
|
---|
360 | /** @} */
|
---|
361 |
|
---|
362 |
|
---|
363 | /**
|
---|
364 | * SQ ring structure.
|
---|
365 | *
|
---|
366 | * @note Some members of this structure point to memory shared with the kernel,
|
---|
367 | * hence the volatile keyword.
|
---|
368 | */
|
---|
369 | typedef struct RTIOQUEUESQ
|
---|
370 | {
|
---|
371 | /** Pointer to the head counter. */
|
---|
372 | volatile uint32_t *pidxHead;
|
---|
373 | /** Pointer to the tail counter. */
|
---|
374 | volatile uint32_t *pidxTail;
|
---|
375 | /** Mask to apply for the counters to get to the index. */
|
---|
376 | uint32_t fRingMask;
|
---|
377 | /** Number of entries in the ring. */
|
---|
378 | uint32_t cEntries;
|
---|
379 | /** Pointer to the global flags. */
|
---|
380 | volatile uint32_t *pfFlags;
|
---|
381 | /** Pointer to the indirection array used for indexing the real SQ entries. */
|
---|
382 | volatile uint32_t *paidxSqes;
|
---|
383 | } RTIOQUEUESQ;
|
---|
384 |
|
---|
385 |
|
---|
386 | /**
|
---|
387 | * CQ ring structure.
|
---|
388 | *
|
---|
389 | * @note Some members of this structure point to memory shared with the kernel,
|
---|
390 | * hence the volatile keyword.
|
---|
391 | */
|
---|
392 | typedef struct RTIOQUEUECQ
|
---|
393 | {
|
---|
394 | /** Pointer to the head counter. */
|
---|
395 | volatile uint32_t *pidxHead;
|
---|
396 | /** Pointer to the tail counter. */
|
---|
397 | volatile uint32_t *pidxTail;
|
---|
398 | /** Mask to apply for the counters to get to the index. */
|
---|
399 | uint32_t fRingMask;
|
---|
400 | /** Number of entries in the ring. */
|
---|
401 | uint32_t cEntries;
|
---|
402 | /** Pointer to the completion entry ring. */
|
---|
403 | volatile LNXIOURINGCQE *paCqes;
|
---|
404 | } RTIOQUEUECQ;
|
---|
405 |
|
---|
406 |
|
---|
407 | /**
|
---|
408 | * Internal I/O queue provider instance data.
|
---|
409 | */
|
---|
410 | typedef struct RTIOQUEUEPROVINT
|
---|
411 | {
|
---|
412 | /** The io_uring file descriptor. */
|
---|
413 | int iFdIoCtx;
|
---|
414 | /** The eventfd file descriptor registered with the ring. */
|
---|
415 | int iFdEvt;
|
---|
416 | /** The submission queue. */
|
---|
417 | RTIOQUEUESQ Sq;
|
---|
418 | /** The currently uncommitted tail for the SQ. */
|
---|
419 | uint32_t idxSqTail;
|
---|
420 | /** Numbere of uncommitted SQEs. */
|
---|
421 | uint32_t cSqesToCommit;
|
---|
422 | /** The completion queue. */
|
---|
423 | RTIOQUEUECQ Cq;
|
---|
424 | /** Pointer to the mapped SQES entries. */
|
---|
425 | PLNXIOURINGSQE paSqes;
|
---|
426 | /** Pointer to the iovec structure used for non S/G requests. */
|
---|
427 | struct iovec *paIoVecs;
|
---|
428 | /** Pointer returned by mmap() for the SQ ring, used for unmapping. */
|
---|
429 | void *pvMMapSqRing;
|
---|
430 | /** Pointer returned by mmap() for the CQ ring, used for unmapping. */
|
---|
431 | void *pvMMapCqRing;
|
---|
432 | /** Pointer returned by mmap() for the SQ entries array, used for unmapping. */
|
---|
433 | void *pvMMapSqes;
|
---|
434 | /** Size of the mapped SQ ring, used for unmapping. */
|
---|
435 | size_t cbMMapSqRing;
|
---|
436 | /** Size of the mapped CQ ring, used for unmapping. */
|
---|
437 | size_t cbMMapCqRing;
|
---|
438 | /** Size of the mapped SQ entries array, used for unmapping. */
|
---|
439 | size_t cbMMapSqes;
|
---|
440 | /** Flag whether the waiter was woken up externally. */
|
---|
441 | volatile bool fExtIntr;
|
---|
442 | } RTIOQUEUEPROVINT;
|
---|
443 | /** Pointer to the internal I/O queue provider instance data. */
|
---|
444 | typedef RTIOQUEUEPROVINT *PRTIOQUEUEPROVINT;
|
---|
445 |
|
---|
446 |
|
---|
447 | /*********************************************************************************************************************************
|
---|
448 | * Internal Functions *
|
---|
449 | *********************************************************************************************************************************/
|
---|
450 |
|
---|
451 | /**
|
---|
452 | * Syscall wrapper for io_uring_setup().
|
---|
453 | *
|
---|
454 | * @returns IPRT status code.
|
---|
455 | * @param cEntries Number of entries for submission and completion queues.
|
---|
456 | * @param pParams Additional parameters for the I/O ring and updated return values
|
---|
457 | * on success.
|
---|
458 | * @param piFdIoCtx Where to store the file descriptor of the I/O ring on success.
|
---|
459 | */
|
---|
460 | DECLINLINE(int) rtIoQueueLnxIoURingSetup(uint32_t cEntries, PLNXIOURINGPARAMS pParams, int32_t *piFdIoCtx)
|
---|
461 | {
|
---|
462 | int rcLnx = syscall(LNX_IOURING_SYSCALL_SETUP, cEntries, pParams);
|
---|
463 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
464 | return RTErrConvertFromErrno(errno);
|
---|
465 |
|
---|
466 | *piFdIoCtx = rcLnx;
|
---|
467 | return VINF_SUCCESS;
|
---|
468 | }
|
---|
469 |
|
---|
470 |
|
---|
471 | /**
|
---|
472 | * Syscall wrapper for io_uring_enter().
|
---|
473 | *
|
---|
474 | * @returns IPRT status code.
|
---|
475 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
476 | * @param cToSubmit Maximum number of requests waiting for processing.
|
---|
477 | * @param cMinComplete Minimum number of completion events to accumulate before returning.
|
---|
478 | * @param fFlags Flags for io_uring_enter(), see LNX_IOURING_ENTER_F_*.
|
---|
479 | */
|
---|
480 | DECLINLINE(int) rtIoQueueLnxIoURingEnter(int32_t iFdIoCtx, uint32_t cToSubmit, uint32_t cMinComplete,
|
---|
481 | uint32_t fFlags)
|
---|
482 | {
|
---|
483 | int rcLnx = syscall(LNX_IOURING_SYSCALL_ENTER, iFdIoCtx, cToSubmit, cMinComplete, fFlags,
|
---|
484 | NULL, 0);
|
---|
485 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
486 | return RTErrConvertFromErrno(errno);
|
---|
487 |
|
---|
488 | return VINF_SUCCESS;
|
---|
489 | }
|
---|
490 |
|
---|
491 |
|
---|
492 | /**
|
---|
493 | * Syscall wrapper for io_uring_register().
|
---|
494 | *
|
---|
495 | * @returns IPRT status code.
|
---|
496 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
497 | * @param uOpc Operation to perform, see LNX_IOURING_REGISTER_OPC_*.
|
---|
498 | * @param pvArg Opaque arguments.
|
---|
499 | * @param cArgs Number of arguments.
|
---|
500 | */
|
---|
501 | DECLINLINE(int) rtIoQueueLnxIoURingRegister(int32_t iFdIoCtx, uint32_t uOpc, void *pvArg,
|
---|
502 | uint32_t cArgs)
|
---|
503 | {
|
---|
504 | int rcLnx = syscall(LNX_IOURING_SYSCALL_REGISTER, iFdIoCtx, uOpc, pvArg, cArgs);
|
---|
505 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
506 | return RTErrConvertFromErrno(errno);
|
---|
507 |
|
---|
508 | return VINF_SUCCESS;
|
---|
509 | }
|
---|
510 |
|
---|
511 |
|
---|
512 | /**
|
---|
513 | * mmap() wrapper for the common bits and returning an IPRT status code.
|
---|
514 | *
|
---|
515 | * @returns IPRT status code.
|
---|
516 | * @param iFdIoCtx The I/O ring file descriptor.
|
---|
517 | * @param offMmap The mmap() offset.
|
---|
518 | * @param cbMmap How much to map.
|
---|
519 | * @param ppv Where to store the pointer to the mapping on success.
|
---|
520 | */
|
---|
521 | DECLINLINE(int) rtIoQueueLnxIoURingMmap(int iFdIoCtx, off_t offMmap, size_t cbMmap, void **ppv)
|
---|
522 | {
|
---|
523 | void *pv = mmap(0, cbMmap, PROT_READ | PROT_WRITE , MAP_SHARED | MAP_POPULATE, iFdIoCtx, offMmap);
|
---|
524 | if (pv != MAP_FAILED)
|
---|
525 | {
|
---|
526 | *ppv = pv;
|
---|
527 | return VINF_SUCCESS;
|
---|
528 | }
|
---|
529 |
|
---|
530 | return RTErrConvertFromErrno(errno);
|
---|
531 | }
|
---|
532 |
|
---|
533 |
|
---|
534 | /**
|
---|
535 | * eventfd2() syscall wrapper.
|
---|
536 | *
|
---|
537 | * @returns IPRT status code.
|
---|
538 | * @param uValInit The initial value of the maintained counter.
|
---|
539 | * @param fFlags Flags controlling the eventfd behavior.
|
---|
540 | * @param piFdEvt Where to store the file descriptor of the eventfd object on success.
|
---|
541 | */
|
---|
542 | DECLINLINE(int) rtIoQueueLnxEventfd2(uint32_t uValInit, uint32_t fFlags, int *piFdEvt)
|
---|
543 | {
|
---|
544 | int rcLnx = syscall(LNX_SYSCALL_EVENTFD2, uValInit, fFlags);
|
---|
545 | if (RT_UNLIKELY(rcLnx == -1))
|
---|
546 | return RTErrConvertFromErrno(errno);
|
---|
547 |
|
---|
548 | *piFdEvt = rcLnx;
|
---|
549 | return VINF_SUCCESS;
|
---|
550 | }
|
---|
551 |
|
---|
552 |
|
---|
553 | /**
|
---|
554 | * Checks the completion event queue for pending events.
|
---|
555 | *
|
---|
556 | * @returns nothing.
|
---|
557 | * @param pThis The provider instance.
|
---|
558 | * @param paCEvt Pointer to the array of completion events.
|
---|
559 | * @param cCEvt Maximum number of completion events the array can hold.
|
---|
560 | * @param pcCEvtSeen Where to store the number of completion events processed.
|
---|
561 | */
|
---|
562 | static void rtIoQueueLnxIoURingFileProvCqCheck(PRTIOQUEUEPROVINT pThis, PRTIOQUEUECEVT paCEvt,
|
---|
563 | uint32_t cCEvt, uint32_t *pcCEvtSeen)
|
---|
564 | {
|
---|
565 | /* The fencing and atomic accesses are kind of overkill and probably not required (dev paranoia). */
|
---|
566 | ASMReadFence();
|
---|
567 | uint32_t idxCqHead = ASMAtomicReadU32(pThis->Cq.pidxHead);
|
---|
568 | uint32_t idxCqTail = ASMAtomicReadU32(pThis->Cq.pidxTail);
|
---|
569 | ASMReadFence();
|
---|
570 |
|
---|
571 | uint32_t cCEvtSeen = 0;
|
---|
572 |
|
---|
573 | while ( idxCqTail != idxCqHead
|
---|
574 | && cCEvtSeen < cCEvt)
|
---|
575 | {
|
---|
576 | /* Get the index. */
|
---|
577 | uint32_t idxCqe = idxCqHead & pThis->Cq.fRingMask;
|
---|
578 | volatile LNXIOURINGCQE *pCqe = &pThis->Cq.paCqes[idxCqe];
|
---|
579 |
|
---|
580 | paCEvt->pvUser = (void *)(uintptr_t)pCqe->u64User;
|
---|
581 | if (pCqe->rcLnx >= 0)
|
---|
582 | {
|
---|
583 | paCEvt->rcReq = VINF_SUCCESS;
|
---|
584 | paCEvt->cbXfered = (size_t)pCqe->rcLnx;
|
---|
585 | }
|
---|
586 | else
|
---|
587 | paCEvt->rcReq = RTErrConvertFromErrno(-pCqe->rcLnx);
|
---|
588 |
|
---|
589 | paCEvt++;
|
---|
590 | cCEvtSeen++;
|
---|
591 | idxCqHead++;
|
---|
592 | }
|
---|
593 |
|
---|
594 | *pcCEvtSeen = cCEvtSeen;
|
---|
595 |
|
---|
596 | /* Paranoia strikes again. */
|
---|
597 | ASMWriteFence();
|
---|
598 | ASMAtomicWriteU32(pThis->Cq.pidxHead, idxCqHead);
|
---|
599 | ASMWriteFence();
|
---|
600 | }
|
---|
601 |
|
---|
602 |
|
---|
603 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnIsSupported} */
|
---|
604 | static DECLCALLBACK(bool) rtIoQueueLnxIoURingFileProv_IsSupported(void)
|
---|
605 | {
|
---|
606 | /*
|
---|
607 | * Try to create a simple I/O ring and close it again.
|
---|
608 | * The common code/public API already checked for the proper handle type.
|
---|
609 | */
|
---|
610 | int iFdIoCtx = 0;
|
---|
611 | bool fSupp = false;
|
---|
612 | LNXIOURINGPARAMS Params;
|
---|
613 | RT_ZERO(Params);
|
---|
614 |
|
---|
615 | int rc = rtIoQueueLnxIoURingSetup(16, &Params, &iFdIoCtx);
|
---|
616 | if (RT_SUCCESS(rc))
|
---|
617 | {
|
---|
618 | /*
|
---|
619 | * Check that we can register an eventfd descriptor to get notified about
|
---|
620 | * completion events while being able to kick the waiter externally out of the wait.
|
---|
621 | */
|
---|
622 | int iFdEvt = 0;
|
---|
623 | rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &iFdEvt);
|
---|
624 | if (RT_SUCCESS(rc))
|
---|
625 | {
|
---|
626 | rc = rtIoQueueLnxIoURingRegister(iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER,
|
---|
627 | &iFdEvt, 1 /*cArgs*/);
|
---|
628 | if (RT_SUCCESS(rc))
|
---|
629 | fSupp = true;
|
---|
630 |
|
---|
631 | int rcLnx = close(iFdEvt); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
632 | }
|
---|
633 | int rcLnx = close(iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
634 | }
|
---|
635 |
|
---|
636 | return fSupp;
|
---|
637 | }
|
---|
638 |
|
---|
639 |
|
---|
640 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueInit} */
|
---|
641 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_QueueInit(RTIOQUEUEPROV hIoQueueProv, uint32_t fFlags,
|
---|
642 | uint32_t cSqEntries, uint32_t cCqEntries)
|
---|
643 | {
|
---|
644 | RT_NOREF(fFlags, cCqEntries);
|
---|
645 |
|
---|
646 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
647 | LNXIOURINGPARAMS Params;
|
---|
648 | RT_ZERO(Params);
|
---|
649 |
|
---|
650 | pThis->cSqesToCommit = 0;
|
---|
651 | pThis->fExtIntr = false;
|
---|
652 |
|
---|
653 | int rc = rtIoQueueLnxIoURingSetup(cSqEntries, &Params, &pThis->iFdIoCtx);
|
---|
654 | if (RT_SUCCESS(rc))
|
---|
655 | {
|
---|
656 | /* Map the rings into userspace. */
|
---|
657 | pThis->cbMMapSqRing = Params.SqOffsets.u32OffArray + Params.u32SqEntriesCnt * sizeof(uint32_t);
|
---|
658 | pThis->cbMMapCqRing = Params.CqOffsets.u32OffCqes + Params.u32CqEntriesCnt * sizeof(LNXIOURINGCQE);
|
---|
659 | pThis->cbMMapSqes = Params.u32SqEntriesCnt * sizeof(LNXIOURINGSQE);
|
---|
660 |
|
---|
661 | pThis->paIoVecs = (struct iovec *)RTMemAllocZ(Params.u32SqEntriesCnt * sizeof(struct iovec));
|
---|
662 | if (RT_LIKELY(pThis->paIoVecs))
|
---|
663 | {
|
---|
664 | rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &pThis->iFdEvt);
|
---|
665 | if (RT_SUCCESS(rc))
|
---|
666 | {
|
---|
667 | rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, &pThis->iFdEvt, 1 /*cArgs*/);
|
---|
668 | if (RT_SUCCESS(rc))
|
---|
669 | {
|
---|
670 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);
|
---|
671 | if (RT_SUCCESS(rc))
|
---|
672 | {
|
---|
673 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);
|
---|
674 | if (RT_SUCCESS(rc))
|
---|
675 | {
|
---|
676 | rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);
|
---|
677 | if (RT_SUCCESS(rc))
|
---|
678 | {
|
---|
679 | uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing;
|
---|
680 |
|
---|
681 | pThis->Sq.pidxHead = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead);
|
---|
682 | pThis->Sq.pidxTail = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail);
|
---|
683 | pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask);
|
---|
684 | pThis->Sq.cEntries = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries);
|
---|
685 | pThis->Sq.pfFlags = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags);
|
---|
686 | pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray);
|
---|
687 | pThis->idxSqTail = *pThis->Sq.pidxTail;
|
---|
688 |
|
---|
689 | pThis->paSqes = (PLNXIOURINGSQE)pThis->pvMMapSqes;
|
---|
690 |
|
---|
691 | pbTmp = (uint8_t *)pThis->pvMMapCqRing;
|
---|
692 |
|
---|
693 | pThis->Cq.pidxHead = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead);
|
---|
694 | pThis->Cq.pidxTail = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail);
|
---|
695 | pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask);
|
---|
696 | pThis->Cq.cEntries = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries);
|
---|
697 | pThis->Cq.paCqes = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes);
|
---|
698 | return VINF_SUCCESS;
|
---|
699 | }
|
---|
700 |
|
---|
701 | munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);
|
---|
702 | }
|
---|
703 |
|
---|
704 | munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);
|
---|
705 | }
|
---|
706 |
|
---|
707 | rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
|
---|
708 | AssertRC(rc);
|
---|
709 | }
|
---|
710 |
|
---|
711 | close(pThis->iFdEvt);
|
---|
712 | }
|
---|
713 |
|
---|
714 | RTMemFree(pThis->paIoVecs);
|
---|
715 | }
|
---|
716 |
|
---|
717 | int rcLnx = close(pThis->iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
718 | }
|
---|
719 |
|
---|
720 | return rc;
|
---|
721 | }
|
---|
722 |
|
---|
723 |
|
---|
724 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueDestroy} */
|
---|
725 | static DECLCALLBACK(void) rtIoQueueLnxIoURingFileProv_QueueDestroy(RTIOQUEUEPROV hIoQueueProv)
|
---|
726 | {
|
---|
727 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
728 |
|
---|
729 | int rcLnx = munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
730 | rcLnx = munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
731 | rcLnx = munmap(pThis->pvMMapSqes, pThis->cbMMapSqes); Assert(!rcLnx); RT_NOREF(rcLnx);
|
---|
732 |
|
---|
733 | int rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
|
---|
734 | AssertRC(rc);
|
---|
735 |
|
---|
736 | close(pThis->iFdEvt);
|
---|
737 | close(pThis->iFdIoCtx);
|
---|
738 | RTMemFree(pThis->paIoVecs);
|
---|
739 |
|
---|
740 | RT_ZERO(pThis);
|
---|
741 | }
|
---|
742 |
|
---|
743 |
|
---|
744 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleRegister} */
|
---|
745 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleRegister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
|
---|
746 | {
|
---|
747 | RT_NOREF(hIoQueueProv, pHandle);
|
---|
748 | /** @todo Add support for fixed file sets later. */
|
---|
749 | return VINF_SUCCESS;
|
---|
750 | }
|
---|
751 |
|
---|
752 |
|
---|
753 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleDeregister} */
|
---|
754 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleDeregister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
|
---|
755 | {
|
---|
756 | RT_NOREF(hIoQueueProv, pHandle);
|
---|
757 | /** @todo Add support for fixed file sets later. */
|
---|
758 | return VINF_SUCCESS;
|
---|
759 | }
|
---|
760 |
|
---|
761 |
|
---|
762 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnReqPrepare} */
|
---|
763 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_ReqPrepare(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle, RTIOQUEUEOP enmOp,
|
---|
764 | uint64_t off, void *pvBuf, size_t cbBuf, uint32_t fReqFlags,
|
---|
765 | void *pvUser)
|
---|
766 | {
|
---|
767 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
768 | RT_NOREF(fReqFlags);
|
---|
769 |
|
---|
770 | uint32_t idx = pThis->idxSqTail & pThis->Sq.fRingMask;
|
---|
771 | PLNXIOURINGSQE pSqe = &pThis->paSqes[idx];
|
---|
772 | struct iovec *pIoVec = &pThis->paIoVecs[idx];
|
---|
773 |
|
---|
774 | pIoVec->iov_base = pvBuf;
|
---|
775 | pIoVec->iov_len = cbBuf;
|
---|
776 |
|
---|
777 | pSqe->u8Flags = 0;
|
---|
778 | pSqe->u16IoPrio = 0;
|
---|
779 | pSqe->i32Fd = (int32_t)RTFileToNative(pHandle->u.hFile);
|
---|
780 | pSqe->u64OffStart = off;
|
---|
781 | pSqe->u64AddrBufIoVec = (uint64_t)(uintptr_t)pIoVec;
|
---|
782 | pSqe->u64User = (uint64_t)(uintptr_t)pvUser;
|
---|
783 |
|
---|
784 | switch (enmOp)
|
---|
785 | {
|
---|
786 | case RTIOQUEUEOP_READ:
|
---|
787 | pSqe->u8Opc = LNX_IOURING_OPC_READV;
|
---|
788 | pSqe->uOpc.u32KrnlRwFlags = 0;
|
---|
789 | break;
|
---|
790 | case RTIOQUEUEOP_WRITE:
|
---|
791 | pSqe->u8Opc = LNX_IOURING_OPC_WRITEV;
|
---|
792 | pSqe->uOpc.u32KrnlRwFlags = 0;
|
---|
793 | break;
|
---|
794 | case RTIOQUEUEOP_SYNC:
|
---|
795 | pSqe->u8Opc = LNX_IOURING_OPC_FSYNC;
|
---|
796 | pSqe->uOpc.u32FsyncFlags = 0;
|
---|
797 | break;
|
---|
798 | default:
|
---|
799 | AssertMsgFailedReturn(("Invalid I/O queue operation: %d\n", enmOp),
|
---|
800 | VERR_INVALID_PARAMETER);
|
---|
801 | }
|
---|
802 |
|
---|
803 | pThis->idxSqTail++;
|
---|
804 | pThis->cSqesToCommit++;
|
---|
805 | return VINF_SUCCESS;
|
---|
806 | }
|
---|
807 |
|
---|
808 |
|
---|
809 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnCommit} */
|
---|
810 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_Commit(RTIOQUEUEPROV hIoQueueProv, uint32_t *pcReqsCommitted)
|
---|
811 | {
|
---|
812 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
813 | RT_NOREF(pThis, pcReqsCommitted);
|
---|
814 |
|
---|
815 | ASMWriteFence();
|
---|
816 | ASMAtomicWriteU32(pThis->Sq.pidxTail, pThis->idxSqTail);
|
---|
817 | ASMWriteFence();
|
---|
818 |
|
---|
819 | int rc = rtIoQueueLnxIoURingEnter(pThis->iFdIoCtx, pThis->cSqesToCommit, 0, 0 /*fFlags*/);
|
---|
820 | if (RT_SUCCESS(rc))
|
---|
821 | {
|
---|
822 | *pcReqsCommitted = pThis->cSqesToCommit;
|
---|
823 | pThis->cSqesToCommit = 0;
|
---|
824 | }
|
---|
825 |
|
---|
826 | return rc;
|
---|
827 | }
|
---|
828 |
|
---|
829 |
|
---|
830 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWait} */
|
---|
831 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWait(RTIOQUEUEPROV hIoQueueProv, PRTIOQUEUECEVT paCEvt, uint32_t cCEvt,
|
---|
832 | uint32_t cMinWait, uint32_t *pcCEvt, uint32_t fFlags)
|
---|
833 | {
|
---|
834 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
835 | int rc = VINF_SUCCESS;
|
---|
836 | uint32_t cCEvtSeen = 0;
|
---|
837 |
|
---|
838 | RT_NOREF(fFlags);
|
---|
839 |
|
---|
840 | /*
|
---|
841 | * Check the completion queue first for any completed events which might save us a
|
---|
842 | * context switch later on.
|
---|
843 | */
|
---|
844 | rtIoQueueLnxIoURingFileProvCqCheck(pThis, paCEvt, cCEvt, &cCEvtSeen);
|
---|
845 |
|
---|
846 | while ( cCEvtSeen < cMinWait
|
---|
847 | && RT_SUCCESS(rc))
|
---|
848 | {
|
---|
849 | /*
|
---|
850 | * We can employ a blocking read on the event file descriptor, it will return
|
---|
851 | * either when woken up externally or when there are completion events pending.
|
---|
852 | */
|
---|
853 | uint64_t uCnt = 0; /**< The counter value returned upon a successful read(). */
|
---|
854 | ssize_t rcLnx = read(pThis->iFdEvt, &uCnt, sizeof(uCnt));
|
---|
855 | if (rcLnx == sizeof(uCnt))
|
---|
856 | {
|
---|
857 | uint32_t cCEvtThisSeen = 0;
|
---|
858 | rtIoQueueLnxIoURingFileProvCqCheck(pThis, &paCEvt[cCEvtSeen], cCEvt - cCEvtSeen, &cCEvtThisSeen);
|
---|
859 | cCEvtSeen += cCEvtThisSeen;
|
---|
860 |
|
---|
861 | /* Whether we got woken up externally. */
|
---|
862 | if (ASMAtomicXchgBool(&pThis->fExtIntr, false))
|
---|
863 | rc = VERR_INTERRUPTED;
|
---|
864 | }
|
---|
865 | else if (rcLnx == -1)
|
---|
866 | rc = RTErrConvertFromErrno(errno);
|
---|
867 | else
|
---|
868 | AssertMsgFailed(("Unexpected read() -> 0\n"));
|
---|
869 | }
|
---|
870 |
|
---|
871 | *pcCEvt = cCEvtSeen;
|
---|
872 | return rc;
|
---|
873 | }
|
---|
874 |
|
---|
875 |
|
---|
876 | /** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWaitWakeup} */
|
---|
877 | static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWaitWakeup(RTIOQUEUEPROV hIoQueueProv)
|
---|
878 | {
|
---|
879 | PRTIOQUEUEPROVINT pThis = hIoQueueProv;
|
---|
880 | int rc = VINF_SUCCESS;
|
---|
881 |
|
---|
882 | if (!ASMAtomicXchgBool(&pThis->fExtIntr, true))
|
---|
883 | {
|
---|
884 | const uint64_t uValAdd = 1;
|
---|
885 | ssize_t rcLnx = write(pThis->iFdEvt, &uValAdd, sizeof(uValAdd));
|
---|
886 |
|
---|
887 | Assert(rcLnx == -1 || rcLnx == sizeof(uValAdd));
|
---|
888 | if (rcLnx == -1)
|
---|
889 | rc = RTErrConvertFromErrno(errno);
|
---|
890 | }
|
---|
891 |
|
---|
892 | return rc;
|
---|
893 | }
|
---|
894 |
|
---|
895 |
|
---|
896 | /**
|
---|
897 | * Async file I/O queue provider virtual method table.
|
---|
898 | */
|
---|
899 | RT_DECL_DATA_CONST(RTIOQUEUEPROVVTABLE const) g_RTIoQueueLnxIoURingProv =
|
---|
900 | {
|
---|
901 | /** uVersion */
|
---|
902 | RTIOQUEUEPROVVTABLE_VERSION,
|
---|
903 | /** pszId */
|
---|
904 | "LnxIoURingFile",
|
---|
905 | /** cbIoQueueProv */
|
---|
906 | sizeof(RTIOQUEUEPROVINT),
|
---|
907 | /** enmHnd */
|
---|
908 | RTHANDLETYPE_FILE,
|
---|
909 | /** fFlags */
|
---|
910 | 0,
|
---|
911 | /** pfnIsSupported */
|
---|
912 | rtIoQueueLnxIoURingFileProv_IsSupported,
|
---|
913 | /** pfnQueueInit */
|
---|
914 | rtIoQueueLnxIoURingFileProv_QueueInit,
|
---|
915 | /** pfnQueueDestroy */
|
---|
916 | rtIoQueueLnxIoURingFileProv_QueueDestroy,
|
---|
917 | /** pfnHandleRegister */
|
---|
918 | rtIoQueueLnxIoURingFileProv_HandleRegister,
|
---|
919 | /** pfnHandleDeregister */
|
---|
920 | rtIoQueueLnxIoURingFileProv_HandleDeregister,
|
---|
921 | /** pfnReqPrepare */
|
---|
922 | rtIoQueueLnxIoURingFileProv_ReqPrepare,
|
---|
923 | /** pfnReqPrepareSg */
|
---|
924 | NULL,
|
---|
925 | /** pfnCommit */
|
---|
926 | rtIoQueueLnxIoURingFileProv_Commit,
|
---|
927 | /** pfnEvtWait */
|
---|
928 | rtIoQueueLnxIoURingFileProv_EvtWait,
|
---|
929 | /** pfnEvtWaitWakeup */
|
---|
930 | rtIoQueueLnxIoURingFileProv_EvtWaitWakeup,
|
---|
931 | /** uEndMarker */
|
---|
932 | RTIOQUEUEPROVVTABLE_VERSION
|
---|
933 | };
|
---|
934 |
|
---|