VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/fileaio-posix.cpp@ 93115

Last change on this file since 93115 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.8 KB
Line 
1/* $Id: fileaio-posix.cpp 93115 2022-01-01 11:31:46Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for POSIX compliant host platforms.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#define LOG_GROUP RTLOGGROUP_DIR
32#include <iprt/asm.h>
33#include <iprt/file.h>
34#include <iprt/mem.h>
35#include <iprt/assert.h>
36#include <iprt/string.h>
37#include <iprt/err.h>
38#include <iprt/log.h>
39#include <iprt/thread.h>
40#include <iprt/semaphore.h>
41#include "internal/fileaio.h"
42
43#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
44# include <sys/types.h>
45# include <sys/sysctl.h> /* for sysctlbyname */
46#endif
47#if defined(RT_OS_FREEBSD)
48# include <fcntl.h> /* O_SYNC */
49#endif
50#include <aio.h>
51#include <errno.h>
52#include <time.h>
53
54/*
55 * Linux does not define this value.
56 * Just define it with really big
57 * value.
58 */
59#ifndef AIO_LISTIO_MAX
60# define AIO_LISTIO_MAX UINT32_MAX
61#endif
62
63#if 0 /* Only used for debugging */
64# undef AIO_LISTIO_MAX
65# define AIO_LISTIO_MAX 16
66#endif
67
68/** Invalid entry in the waiting array. */
69#define RTFILEAIOCTX_WAIT_ENTRY_INVALID (~0U)
70
71/** No-op replacement for rtFileAioCtxDump for non debug builds */
72#ifndef LOG_ENABLED
73# define rtFileAioCtxDump(pCtxInt) do {} while (0)
74#endif
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * Async I/O request state.
82 */
83typedef struct RTFILEAIOREQINTERNAL
84{
85 /** The aio control block. FIRST ELEMENT! */
86 struct aiocb AioCB;
87 /** Next element in the chain. */
88 struct RTFILEAIOREQINTERNAL *pNext;
89 /** Previous element in the chain. */
90 struct RTFILEAIOREQINTERNAL *pPrev;
91 /** Current state the request is in. */
92 RTFILEAIOREQSTATE enmState;
93 /** Flag whether this is a flush request. */
94 bool fFlush;
95 /** Flag indicating if the request was canceled. */
96 volatile bool fCanceled;
97 /** Opaque user data. */
98 void *pvUser;
99 /** Number of bytes actually transferred. */
100 size_t cbTransfered;
101 /** Status code. */
102 int Rc;
103 /** Completion context we are assigned to. */
104 struct RTFILEAIOCTXINTERNAL *pCtxInt;
105 /** Entry in the waiting list the request is in. */
106 unsigned iWaitingList;
107 /** Magic value (RTFILEAIOREQ_MAGIC). */
108 uint32_t u32Magic;
109} RTFILEAIOREQINTERNAL, *PRTFILEAIOREQINTERNAL;
110
111/**
112 * Async I/O completion context state.
113 */
114typedef struct RTFILEAIOCTXINTERNAL
115{
116 /** Current number of requests active on this context. */
117 volatile int32_t cRequests;
118 /** Maximum number of requests this context can handle. */
119 uint32_t cMaxRequests;
120 /** The ID of the thread which is currently waiting for requests. */
121 volatile RTTHREAD hThreadWait;
122 /** Flag whether the thread was woken up. */
123 volatile bool fWokenUp;
124 /** Flag whether the thread is currently waiting in the syscall. */
125 volatile bool fWaiting;
126 /** Flags given during creation. */
127 uint32_t fFlags;
128 /** Magic value (RTFILEAIOCTX_MAGIC). */
129 uint32_t u32Magic;
130 /** Flag whether the thread was woken up due to a internal event. */
131 volatile bool fWokenUpInternal;
132 /** List of new requests which needs to be inserted into apReqs by the
133 * waiting thread. */
134 volatile PRTFILEAIOREQINTERNAL apReqsNewHead[5];
135 /** Special entry for requests which are canceled. Because only one
136 * request can be canceled at a time and the thread canceling the request
137 * has to wait we need only one entry. */
138 volatile PRTFILEAIOREQINTERNAL pReqToCancel;
139 /** Event semaphore the canceling thread is waiting for completion of
140 * the operation. */
141 RTSEMEVENT SemEventCancel;
142 /** Head of submitted elements waiting to get into the array. */
143 PRTFILEAIOREQINTERNAL pReqsWaitHead;
144 /** Tail of submitted elements waiting to get into the array. */
145 PRTFILEAIOREQINTERNAL pReqsWaitTail;
146 /** Maximum number of elements in the waiting array. */
147 unsigned cReqsWaitMax;
148 /** First free slot in the waiting list. */
149 unsigned iFirstFree;
150 /** List of requests we are currently waiting on.
151 * Size depends on cMaxRequests and AIO_LISTIO_MAX. */
152 volatile PRTFILEAIOREQINTERNAL apReqs[1];
153} RTFILEAIOCTXINTERNAL, *PRTFILEAIOCTXINTERNAL;
154
155/**
156 * Internal worker for waking up the waiting thread.
157 */
158static void rtFileAioCtxWakeup(PRTFILEAIOCTXINTERNAL pCtxInt)
159{
160 /*
161 * Read the thread handle before the status flag.
162 * If we read the handle after the flag we might
163 * end up with an invalid handle because the thread
164 * waiting in RTFileAioCtxWakeup() might get scheduled
165 * before we read the flag and returns.
166 * We can ensure that the handle is valid if fWaiting is true
167 * when reading the handle before the status flag.
168 */
169 RTTHREAD hThread;
170 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
171 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
172 if (fWaiting)
173 {
174 /*
175 * If a thread waits the handle must be valid.
176 * It is possible that the thread returns from
177 * aio_suspend() before the signal is send.
178 * This is no problem because we already set fWokenUp
179 * to true which will let the thread return VERR_INTERRUPTED
180 * and the next call to RTFileAioCtxWait() will not
181 * return VERR_INTERRUPTED because signals are not saved
182 * and will simply vanish if the destination thread can't
183 * receive it.
184 */
185 Assert(hThread != NIL_RTTHREAD);
186 RTThreadPoke(hThread);
187 }
188}
189
190/**
191 * Internal worker processing events and inserting new requests into the waiting list.
192 */
193static int rtFileAioCtxProcessEvents(PRTFILEAIOCTXINTERNAL pCtxInt)
194{
195 int rc = VINF_SUCCESS;
196
197 /* Process new requests first. */
198 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, false);
199 if (fWokenUp)
200 {
201 for (unsigned iSlot = 0; iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead); iSlot++)
202 {
203 PRTFILEAIOREQINTERNAL pReqHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[iSlot], NULL, PRTFILEAIOREQINTERNAL);
204
205 while ( (pCtxInt->iFirstFree < pCtxInt->cReqsWaitMax)
206 && pReqHead)
207 {
208 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
209 pCtxInt->apReqs[pCtxInt->iFirstFree] = pReqHead;
210 pReqHead->iWaitingList = pCtxInt->iFirstFree;
211 pReqHead = pReqHead->pNext;
212
213 /* Clear pointer to next and previous element just for safety. */
214 pCtxInt->apReqs[pCtxInt->iFirstFree]->pNext = NULL;
215 pCtxInt->apReqs[pCtxInt->iFirstFree]->pPrev = NULL;
216 pCtxInt->iFirstFree++;
217
218 Assert( (pCtxInt->iFirstFree <= pCtxInt->cMaxRequests)
219 && (pCtxInt->iFirstFree <= pCtxInt->cReqsWaitMax));
220 }
221
222 /* Append the rest to the wait list. */
223 if (pReqHead)
224 {
225 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
226 if (!pCtxInt->pReqsWaitHead)
227 {
228 Assert(!pCtxInt->pReqsWaitTail);
229 pCtxInt->pReqsWaitHead = pReqHead;
230 pReqHead->pPrev = NULL;
231 }
232 else
233 {
234 AssertPtr(pCtxInt->pReqsWaitTail);
235
236 pCtxInt->pReqsWaitTail->pNext = pReqHead;
237 pReqHead->pPrev = pCtxInt->pReqsWaitTail;
238 }
239
240 /* Update tail. */
241 while (pReqHead->pNext)
242 {
243 RTFIELAIOREQ_ASSERT_STATE(pReqHead->pNext, SUBMITTED);
244 pReqHead = pReqHead->pNext;
245 }
246
247 pCtxInt->pReqsWaitTail = pReqHead;
248 pCtxInt->pReqsWaitTail->pNext = NULL;
249 }
250 }
251
252 /* Check if a request needs to be canceled. */
253 PRTFILEAIOREQINTERNAL pReqToCancel = ASMAtomicReadPtrT(&pCtxInt->pReqToCancel, PRTFILEAIOREQINTERNAL);
254 if (pReqToCancel)
255 {
256 /* The request can be in the array waiting for completion or still in the list because it is full. */
257 if (pReqToCancel->iWaitingList != RTFILEAIOCTX_WAIT_ENTRY_INVALID)
258 {
259 /* Put it out of the waiting list. */
260 pCtxInt->apReqs[pReqToCancel->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
261 pCtxInt->apReqs[pReqToCancel->iWaitingList]->iWaitingList = pReqToCancel->iWaitingList;
262 }
263 else
264 {
265 /* Unlink from the waiting list. */
266 PRTFILEAIOREQINTERNAL pPrev = pReqToCancel->pPrev;
267 PRTFILEAIOREQINTERNAL pNext = pReqToCancel->pNext;
268
269 if (pNext)
270 pNext->pPrev = pPrev;
271 else
272 {
273 /* We canceled the tail. */
274 pCtxInt->pReqsWaitTail = pPrev;
275 }
276
277 if (pPrev)
278 pPrev->pNext = pNext;
279 else
280 {
281 /* We canceled the head. */
282 pCtxInt->pReqsWaitHead = pNext;
283 }
284 }
285
286 ASMAtomicDecS32(&pCtxInt->cRequests);
287 AssertMsg(pCtxInt->cRequests >= 0, ("Canceled request not which is not in this context\n"));
288 RTSemEventSignal(pCtxInt->SemEventCancel);
289 }
290 }
291 else
292 {
293 if (ASMAtomicXchgBool(&pCtxInt->fWokenUp, false))
294 rc = VERR_INTERRUPTED;
295 }
296
297 return rc;
298}
299
300RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
301{
302 int rcBSD = 0;
303 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
304
305#if defined(RT_OS_DARWIN)
306 int cReqsOutstandingMax = 0;
307 size_t cbParameter = sizeof(int);
308
309 rcBSD = sysctlbyname("kern.aioprocmax", /* name */
310 &cReqsOutstandingMax, /* Where to store the old value. */
311 &cbParameter, /* Size of the memory pointed to. */
312 NULL, /* Where the new value is located. */
313 0); /* Where the size of the new value is stored. */
314 if (rcBSD == -1)
315 return RTErrConvertFromErrno(errno);
316
317 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
318 pAioLimits->cbBufferAlignment = 0;
319#elif defined(RT_OS_FREEBSD)
320 /*
321 * The AIO API is implemented in a kernel module which is not
322 * loaded by default.
323 * If it is loaded there are additional sysctl parameters.
324 */
325 int cReqsOutstandingMax = 0;
326 size_t cbParameter = sizeof(int);
327
328 rcBSD = sysctlbyname("vfs.aio.max_aio_per_proc", /* name */
329 &cReqsOutstandingMax, /* Where to store the old value. */
330 &cbParameter, /* Size of the memory pointed to. */
331 NULL, /* Where the new value is located. */
332 0); /* Where the size of the new value is stored. */
333 if (rcBSD == -1)
334 {
335 /* ENOENT means the value is unknown thus the module is not loaded. */
336 if (errno == ENOENT)
337 return VERR_NOT_SUPPORTED;
338 else
339 return RTErrConvertFromErrno(errno);
340 }
341
342 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
343 pAioLimits->cbBufferAlignment = 0;
344#else
345 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
346 pAioLimits->cbBufferAlignment = 0;
347#endif
348
349 return VINF_SUCCESS;
350}
351
352RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
353{
354 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
355
356 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOREQINTERNAL));
357 if (RT_UNLIKELY(!pReqInt))
358 return VERR_NO_MEMORY;
359
360 pReqInt->pCtxInt = NULL;
361 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
362 pReqInt->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
363 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
364
365 *phReq = (RTFILEAIOREQ)pReqInt;
366
367 return VINF_SUCCESS;
368}
369
370
371RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
372{
373 /*
374 * Validate the handle and ignore nil.
375 */
376 if (hReq == NIL_RTFILEAIOREQ)
377 return VINF_SUCCESS;
378 PRTFILEAIOREQINTERNAL pReqInt = hReq;
379 RTFILEAIOREQ_VALID_RETURN(pReqInt);
380 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
381
382 /*
383 * Trash the magic and free it.
384 */
385 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
386 RTMemFree(pReqInt);
387 return VINF_SUCCESS;
388}
389
390/**
391 * Worker setting up the request.
392 */
393DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
394 unsigned uTransferDirection,
395 RTFOFF off, void *pvBuf, size_t cbTransfer,
396 void *pvUser)
397{
398 /*
399 * Validate the input.
400 */
401 PRTFILEAIOREQINTERNAL pReqInt = hReq;
402 RTFILEAIOREQ_VALID_RETURN(pReqInt);
403 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
404 Assert(hFile != NIL_RTFILE);
405 AssertPtr(pvBuf);
406 Assert(off >= 0);
407 Assert(cbTransfer > 0);
408
409 memset(&pReqInt->AioCB, 0, sizeof(struct aiocb));
410 pReqInt->fFlush = false;
411 pReqInt->AioCB.aio_lio_opcode = uTransferDirection;
412 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
413 pReqInt->AioCB.aio_offset = off;
414 pReqInt->AioCB.aio_nbytes = cbTransfer;
415 pReqInt->AioCB.aio_buf = pvBuf;
416 pReqInt->pvUser = pvUser;
417 pReqInt->pCtxInt = NULL;
418 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
419 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
420
421 return VINF_SUCCESS;
422}
423
424
425RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
426 void *pvBuf, size_t cbRead, void *pvUser)
427{
428 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_READ,
429 off, pvBuf, cbRead, pvUser);
430}
431
432
433RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
434 void const *pvBuf, size_t cbWrite, void *pvUser)
435{
436 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_WRITE,
437 off, (void *)pvBuf, cbWrite, pvUser);
438}
439
440
441RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
442{
443 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)hReq;
444
445 RTFILEAIOREQ_VALID_RETURN(pReqInt);
446 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
447 Assert(hFile != NIL_RTFILE);
448
449 pReqInt->fFlush = true;
450 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
451 pReqInt->AioCB.aio_offset = 0;
452 pReqInt->AioCB.aio_nbytes = 0;
453 pReqInt->AioCB.aio_buf = NULL;
454 pReqInt->pvUser = pvUser;
455 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
456 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
457
458 return VINF_SUCCESS;
459}
460
461
462RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
463{
464 PRTFILEAIOREQINTERNAL pReqInt = hReq;
465 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
466
467 return pReqInt->pvUser;
468}
469
470
471RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
472{
473 PRTFILEAIOREQINTERNAL pReqInt = hReq;
474 RTFILEAIOREQ_VALID_RETURN(pReqInt);
475 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
476
477 ASMAtomicXchgBool(&pReqInt->fCanceled, true);
478
479 int rcPosix = aio_cancel(pReqInt->AioCB.aio_fildes, &pReqInt->AioCB);
480
481 if (rcPosix == AIO_CANCELED)
482 {
483 PRTFILEAIOCTXINTERNAL pCtxInt = pReqInt->pCtxInt;
484 /*
485 * Notify the waiting thread that the request was canceled.
486 */
487 AssertMsg(RT_VALID_PTR(pCtxInt), ("Invalid state. Request was canceled but wasn't submitted\n"));
488
489 Assert(!pCtxInt->pReqToCancel);
490 ASMAtomicWritePtr(&pCtxInt->pReqToCancel, pReqInt);
491 rtFileAioCtxWakeup(pCtxInt);
492
493 /* Wait for acknowledge. */
494 int rc = RTSemEventWait(pCtxInt->SemEventCancel, RT_INDEFINITE_WAIT);
495 AssertRC(rc);
496
497 ASMAtomicWriteNullPtr(&pCtxInt->pReqToCancel);
498 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
499 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
500 return VINF_SUCCESS;
501 }
502 else if (rcPosix == AIO_ALLDONE)
503 return VERR_FILE_AIO_COMPLETED;
504 else if (rcPosix == AIO_NOTCANCELED)
505 return VERR_FILE_AIO_IN_PROGRESS;
506 else
507 return RTErrConvertFromErrno(errno);
508}
509
510
511RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
512{
513 PRTFILEAIOREQINTERNAL pReqInt = hReq;
514 RTFILEAIOREQ_VALID_RETURN(pReqInt);
515 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
516 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
517 AssertPtrNull(pcbTransfered);
518
519 if ( (RT_SUCCESS(pReqInt->Rc))
520 && (pcbTransfered))
521 *pcbTransfered = pReqInt->cbTransfered;
522
523 return pReqInt->Rc;
524}
525
526
527RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
528 uint32_t fFlags)
529{
530 PRTFILEAIOCTXINTERNAL pCtxInt;
531 unsigned cReqsWaitMax;
532
533 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
534 AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
535
536 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
537 return VERR_OUT_OF_RANGE;
538
539 cReqsWaitMax = RT_MIN(cAioReqsMax, AIO_LISTIO_MAX);
540
541 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ( sizeof(RTFILEAIOCTXINTERNAL)
542 + cReqsWaitMax * sizeof(PRTFILEAIOREQINTERNAL));
543 if (RT_UNLIKELY(!pCtxInt))
544 return VERR_NO_MEMORY;
545
546 /* Create event semaphore. */
547 int rc = RTSemEventCreate(&pCtxInt->SemEventCancel);
548 if (RT_FAILURE(rc))
549 {
550 RTMemFree(pCtxInt);
551 return rc;
552 }
553
554 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
555 pCtxInt->cMaxRequests = cAioReqsMax;
556 pCtxInt->cReqsWaitMax = cReqsWaitMax;
557 pCtxInt->fFlags = fFlags;
558 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
559
560 return VINF_SUCCESS;
561}
562
563
564RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
565{
566 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
567
568 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
569
570 if (RT_UNLIKELY(pCtxInt->cRequests))
571 return VERR_FILE_AIO_BUSY;
572
573 RTSemEventDestroy(pCtxInt->SemEventCancel);
574 RTMemFree(pCtxInt);
575
576 return VINF_SUCCESS;
577}
578
579
580RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
581{
582 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
583
584 if (hAioCtx == NIL_RTFILEAIOCTX)
585 return RTFILEAIO_UNLIMITED_REQS;
586 return pCtxInt->cMaxRequests;
587}
588
589RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
590{
591 NOREF(hAioCtx); NOREF(hFile);
592 return VINF_SUCCESS;
593}
594
595#ifdef LOG_ENABLED
596/**
597 * Dumps the state of a async I/O context.
598 */
599static void rtFileAioCtxDump(PRTFILEAIOCTXINTERNAL pCtxInt)
600{
601 LogFlow(("cRequests=%d\n", pCtxInt->cRequests));
602 LogFlow(("cMaxRequests=%u\n", pCtxInt->cMaxRequests));
603 LogFlow(("hThreadWait=%#p\n", pCtxInt->hThreadWait));
604 LogFlow(("fWokenUp=%RTbool\n", pCtxInt->fWokenUp));
605 LogFlow(("fWaiting=%RTbool\n", pCtxInt->fWaiting));
606 LogFlow(("fWokenUpInternal=%RTbool\n", pCtxInt->fWokenUpInternal));
607 for (unsigned i = 0; i < RT_ELEMENTS(pCtxInt->apReqsNewHead); i++)
608 LogFlow(("apReqsNewHead[%u]=%#p\n", i, pCtxInt->apReqsNewHead[i]));
609 LogFlow(("pReqToCancel=%#p\n", pCtxInt->pReqToCancel));
610 LogFlow(("pReqsWaitHead=%#p\n", pCtxInt->pReqsWaitHead));
611 LogFlow(("pReqsWaitTail=%#p\n", pCtxInt->pReqsWaitTail));
612 LogFlow(("cReqsWaitMax=%u\n", pCtxInt->cReqsWaitMax));
613 LogFlow(("iFirstFree=%u\n", pCtxInt->iFirstFree));
614 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
615 LogFlow(("apReqs[%u]=%#p\n", i, pCtxInt->apReqs[i]));
616}
617#endif
618
619RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
620{
621 int rc = VINF_SUCCESS;
622 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
623
624 /* Parameter checks */
625 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
626 AssertReturn(cReqs != 0, VERR_INVALID_POINTER);
627 AssertPtrReturn(pahReqs, VERR_INVALID_PARAMETER);
628
629 rtFileAioCtxDump(pCtxInt);
630
631 /* Check that we don't exceed the limit */
632 if (ASMAtomicUoReadS32(&pCtxInt->cRequests) + cReqs > pCtxInt->cMaxRequests)
633 return VERR_FILE_AIO_LIMIT_EXCEEDED;
634
635 PRTFILEAIOREQINTERNAL pHead = NULL;
636
637 do
638 {
639 int rcPosix = 0;
640 size_t cReqsSubmit = 0;
641 size_t i = 0;
642 PRTFILEAIOREQINTERNAL pReqInt;
643
644 while ( (i < cReqs)
645 && (i < AIO_LISTIO_MAX))
646 {
647 pReqInt = pahReqs[i];
648 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
649 {
650 /* Undo everything and stop submitting. */
651 for (size_t iUndo = 0; iUndo < i; iUndo++)
652 {
653 pReqInt = pahReqs[iUndo];
654 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
655 pReqInt->pCtxInt = NULL;
656
657 /* Unlink from the list again. */
658 PRTFILEAIOREQINTERNAL pNext, pPrev;
659 pNext = pReqInt->pNext;
660 pPrev = pReqInt->pPrev;
661 if (pNext)
662 pNext->pPrev = pPrev;
663 if (pPrev)
664 pPrev->pNext = pNext;
665 else
666 pHead = pNext;
667 }
668 rc = VERR_INVALID_HANDLE;
669 break;
670 }
671
672 pReqInt->pCtxInt = pCtxInt;
673
674 if (pReqInt->fFlush)
675 break;
676
677 /* Link them together. */
678 pReqInt->pNext = pHead;
679 if (pHead)
680 pHead->pPrev = pReqInt;
681 pReqInt->pPrev = NULL;
682 pHead = pReqInt;
683 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
684
685 cReqsSubmit++;
686 i++;
687 }
688
689 if (cReqsSubmit)
690 {
691 rcPosix = lio_listio(LIO_NOWAIT, (struct aiocb **)pahReqs, cReqsSubmit, NULL);
692 if (RT_UNLIKELY(rcPosix < 0))
693 {
694 size_t cReqsSubmitted = cReqsSubmit;
695
696 if (errno == EAGAIN)
697 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
698 else
699 rc = RTErrConvertFromErrno(errno);
700
701 /* Check which ones were not submitted. */
702 for (i = 0; i < cReqsSubmit; i++)
703 {
704 pReqInt = pahReqs[i];
705
706 rcPosix = aio_error(&pReqInt->AioCB);
707
708 if ((rcPosix != EINPROGRESS) && (rcPosix != 0))
709 {
710 cReqsSubmitted--;
711
712#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
713 if (errno == EINVAL)
714#else
715 if (rcPosix == EINVAL)
716#endif
717 {
718 /* Was not submitted. */
719 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
720 }
721 else
722 {
723 /* An error occurred. */
724 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
725
726 /*
727 * Looks like Apple and glibc interpret the standard in different ways.
728 * glibc returns the error code which would be in errno but Apple returns
729 * -1 and sets errno to the appropriate value
730 */
731#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
732 Assert(rcPosix == -1);
733 pReqInt->Rc = RTErrConvertFromErrno(errno);
734#elif defined(RT_OS_LINUX)
735 pReqInt->Rc = RTErrConvertFromErrno(rcPosix);
736#endif
737 pReqInt->cbTransfered = 0;
738 }
739 /* Unlink from the list. */
740 PRTFILEAIOREQINTERNAL pNext, pPrev;
741 pNext = pReqInt->pNext;
742 pPrev = pReqInt->pPrev;
743 if (pNext)
744 pNext->pPrev = pPrev;
745 if (pPrev)
746 pPrev->pNext = pNext;
747 else
748 pHead = pNext;
749
750 pReqInt->pNext = NULL;
751 pReqInt->pPrev = NULL;
752 }
753 }
754 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
755 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
756 break;
757 }
758
759 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmit);
760 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
761 cReqs -= cReqsSubmit;
762 pahReqs += cReqsSubmit;
763 }
764
765 /*
766 * Check if we have a flush request now.
767 * If not we hit the AIO_LISTIO_MAX limit
768 * and will continue submitting requests
769 * above.
770 */
771 if (cReqs && RT_SUCCESS_NP(rc))
772 {
773 pReqInt = pahReqs[0];
774
775 if (pReqInt->fFlush)
776 {
777 /*
778 * lio_listio does not work with flush requests so
779 * we have to use aio_fsync directly.
780 */
781 rcPosix = aio_fsync(O_SYNC, &pReqInt->AioCB);
782 if (RT_UNLIKELY(rcPosix < 0))
783 {
784 if (errno == EAGAIN)
785 {
786 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
787 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
788 }
789 else
790 {
791 rc = RTErrConvertFromErrno(errno);
792 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
793 pReqInt->Rc = rc;
794 }
795 pReqInt->cbTransfered = 0;
796 break;
797 }
798
799 /* Link them together. */
800 pReqInt->pNext = pHead;
801 if (pHead)
802 pHead->pPrev = pReqInt;
803 pReqInt->pPrev = NULL;
804 pHead = pReqInt;
805 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
806
807 ASMAtomicIncS32(&pCtxInt->cRequests);
808 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
809 cReqs--;
810 pahReqs++;
811 }
812 }
813 } while ( cReqs
814 && RT_SUCCESS_NP(rc));
815
816 if (pHead)
817 {
818 /*
819 * Forward successfully submitted requests to the thread waiting for requests.
820 * We search for a free slot first and if we don't find one
821 * we will grab the first one and append our list to the existing entries.
822 */
823 unsigned iSlot = 0;
824 while ( (iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead))
825 && !ASMAtomicCmpXchgPtr(&pCtxInt->apReqsNewHead[iSlot], pHead, NULL))
826 iSlot++;
827
828 if (iSlot == RT_ELEMENTS(pCtxInt->apReqsNewHead))
829 {
830 /* Nothing found. */
831 PRTFILEAIOREQINTERNAL pOldHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[0], NULL, PRTFILEAIOREQINTERNAL);
832
833 /* Find the end of the current head and link the old list to the current. */
834 PRTFILEAIOREQINTERNAL pTail = pHead;
835 while (pTail->pNext)
836 pTail = pTail->pNext;
837
838 pTail->pNext = pOldHead;
839
840 ASMAtomicWritePtr(&pCtxInt->apReqsNewHead[0], pHead);
841 }
842
843 /* Set the internal wakeup flag and wakeup the thread if possible. */
844 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, true);
845 if (!fWokenUp)
846 rtFileAioCtxWakeup(pCtxInt);
847 }
848
849 rtFileAioCtxDump(pCtxInt);
850
851 return rc;
852}
853
854
855RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
856 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
857{
858 int rc = VINF_SUCCESS;
859 int cRequestsCompleted = 0;
860 PRTFILEAIOCTXINTERNAL pCtxInt = (PRTFILEAIOCTXINTERNAL)hAioCtx;
861 struct timespec Timeout;
862 struct timespec *pTimeout = NULL;
863 uint64_t StartNanoTS = 0;
864
865 LogFlowFunc(("hAioCtx=%#p cMinReqs=%zu cMillies=%u pahReqs=%#p cReqs=%zu pcbReqs=%#p\n",
866 hAioCtx, cMinReqs, cMillies, pahReqs, cReqs, pcReqs));
867
868 /* Check parameters. */
869 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
870 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
871 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
872 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
873 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
874
875 rtFileAioCtxDump(pCtxInt);
876
877 int32_t cRequestsWaiting = ASMAtomicReadS32(&pCtxInt->cRequests);
878
879 if ( RT_UNLIKELY(cRequestsWaiting <= 0)
880 && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
881 return VERR_FILE_AIO_NO_REQUEST;
882
883 if (RT_UNLIKELY(cMinReqs > (uint32_t)cRequestsWaiting))
884 return VERR_INVALID_PARAMETER;
885
886 if (cMillies != RT_INDEFINITE_WAIT)
887 {
888 Timeout.tv_sec = cMillies / 1000;
889 Timeout.tv_nsec = (cMillies % 1000) * 1000000;
890 pTimeout = &Timeout;
891 StartNanoTS = RTTimeNanoTS();
892 }
893
894 /* Wait for at least one. */
895 if (!cMinReqs)
896 cMinReqs = 1;
897
898 /* For the wakeup call. */
899 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
900 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
901
902 /* Update the waiting list once before we enter the loop. */
903 rc = rtFileAioCtxProcessEvents(pCtxInt);
904
905 while ( cMinReqs
906 && RT_SUCCESS_NP(rc))
907 {
908#ifdef RT_STRICT
909 if (RT_UNLIKELY(!pCtxInt->iFirstFree))
910 {
911 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
912 RTAssertMsg2Weak("wait[%d] = %#p\n", i, pCtxInt->apReqs[i]);
913
914 AssertMsgFailed(("No request to wait for. pReqsWaitHead=%#p pReqsWaitTail=%#p\n",
915 pCtxInt->pReqsWaitHead, pCtxInt->pReqsWaitTail));
916 }
917#endif
918
919 LogFlow(("Waiting for %d requests to complete\n", pCtxInt->iFirstFree));
920 rtFileAioCtxDump(pCtxInt);
921
922 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
923 int rcPosix = aio_suspend((const struct aiocb * const *)pCtxInt->apReqs,
924 pCtxInt->iFirstFree, pTimeout);
925 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
926 if (rcPosix < 0)
927 {
928 LogFlow(("aio_suspend failed %d nent=%u\n", errno, pCtxInt->iFirstFree));
929 /* Check that this is an external wakeup event. */
930 if (errno == EINTR)
931 rc = rtFileAioCtxProcessEvents(pCtxInt);
932 else
933 rc = RTErrConvertFromErrno(errno);
934 }
935 else
936 {
937 /* Requests finished. */
938 unsigned iReqCurr = 0;
939 unsigned cDone = 0;
940
941 /* Remove completed requests from the waiting list. */
942 while ( (iReqCurr < pCtxInt->iFirstFree)
943 && (cDone < cReqs))
944 {
945 PRTFILEAIOREQINTERNAL pReq = pCtxInt->apReqs[iReqCurr];
946 int rcReq = aio_error(&pReq->AioCB);
947
948 if (rcReq != EINPROGRESS)
949 {
950 /* Completed store the return code. */
951 if (rcReq == 0)
952 {
953 pReq->Rc = VINF_SUCCESS;
954 /* Call aio_return() to free resources. */
955 pReq->cbTransfered = aio_return(&pReq->AioCB);
956 }
957 else
958 {
959#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
960 pReq->Rc = RTErrConvertFromErrno(errno);
961#else
962 pReq->Rc = RTErrConvertFromErrno(rcReq);
963#endif
964 }
965
966 /* Mark the request as finished. */
967 RTFILEAIOREQ_SET_STATE(pReq, COMPLETED);
968 cDone++;
969
970 /* If there are other entries waiting put the head into the now free entry. */
971 if (pCtxInt->pReqsWaitHead)
972 {
973 PRTFILEAIOREQINTERNAL pReqInsert = pCtxInt->pReqsWaitHead;
974
975 pCtxInt->pReqsWaitHead = pReqInsert->pNext;
976 if (!pCtxInt->pReqsWaitHead)
977 {
978 /* List is empty now. Clear tail too. */
979 pCtxInt->pReqsWaitTail = NULL;
980 }
981
982 pReqInsert->iWaitingList = pReq->iWaitingList;
983 pCtxInt->apReqs[pReqInsert->iWaitingList] = pReqInsert;
984 iReqCurr++;
985 }
986 else
987 {
988 /*
989 * Move the last entry into the current position to avoid holes
990 * but only if it is not the last element already.
991 */
992 if (pReq->iWaitingList < pCtxInt->iFirstFree - 1)
993 {
994 pCtxInt->apReqs[pReq->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
995 pCtxInt->apReqs[pReq->iWaitingList]->iWaitingList = pReq->iWaitingList;
996 }
997 else
998 pCtxInt->iFirstFree--;
999
1000 pCtxInt->apReqs[pCtxInt->iFirstFree] = NULL;
1001 }
1002
1003 /* Put the request into the completed list. */
1004 pahReqs[cRequestsCompleted++] = pReq;
1005 pReq->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
1006 }
1007 else
1008 iReqCurr++;
1009 }
1010
1011 AssertMsg((cDone <= cReqs), ("Overflow cReqs=%u cMinReqs=%u cDone=%u\n",
1012 cReqs, cDone));
1013 cReqs -= cDone;
1014 cMinReqs = RT_MAX(cMinReqs, cDone) - cDone;
1015 ASMAtomicSubS32(&pCtxInt->cRequests, cDone);
1016
1017 AssertMsg(pCtxInt->cRequests >= 0, ("Finished more requests than currently active\n"));
1018
1019 if (!cMinReqs)
1020 break;
1021
1022 if (cMillies != RT_INDEFINITE_WAIT)
1023 {
1024 uint64_t TimeDiff;
1025
1026 /* Recalculate the timeout. */
1027 TimeDiff = RTTimeSystemNanoTS() - StartNanoTS;
1028 Timeout.tv_sec = Timeout.tv_sec - (TimeDiff / 1000000);
1029 Timeout.tv_nsec = Timeout.tv_nsec - (TimeDiff % 1000000);
1030 }
1031
1032 /* Check for new elements. */
1033 rc = rtFileAioCtxProcessEvents(pCtxInt);
1034 }
1035 }
1036
1037 *pcReqs = cRequestsCompleted;
1038 Assert(pCtxInt->hThreadWait == RTThreadSelf());
1039 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
1040
1041 rtFileAioCtxDump(pCtxInt);
1042
1043 return rc;
1044}
1045
1046
1047RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
1048{
1049 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
1050 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
1051
1052 /** @todo r=bird: Define the protocol for how to resume work after calling
1053 * this function. */
1054
1055 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
1056 if (!fWokenUp)
1057 rtFileAioCtxWakeup(pCtxInt);
1058
1059 return VINF_SUCCESS;
1060}
1061
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette