VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/fileaio-posix.cpp@ 45733

Last change on this file since 45733 was 45678, checked in by vboxsync, 12 years ago

Runtime/aio: Add flags parameter to RTFileAioCtxCreate

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.5 KB
Line 
1/* $Id: fileaio-posix.cpp 45678 2013-04-23 11:28:41Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for POSIX compliant host platforms.
4 */
5
6/*
7 * Copyright (C) 2006-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#define LOG_GROUP RTLOGGROUP_DIR
32#include <iprt/asm.h>
33#include <iprt/file.h>
34#include <iprt/mem.h>
35#include <iprt/assert.h>
36#include <iprt/string.h>
37#include <iprt/err.h>
38#include <iprt/log.h>
39#include <iprt/thread.h>
40#include <iprt/semaphore.h>
41#include "internal/fileaio.h"
42
43#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
44# include <sys/types.h>
45# include <sys/sysctl.h> /* for sysctlbyname */
46#endif
47#if defined(RT_OS_FREEBSD)
48# include <fcntl.h> /* O_SYNC */
49#endif
50#include <aio.h>
51#include <errno.h>
52#include <time.h>
53
54/*
55 * Linux does not define this value.
56 * Just define it with really big
57 * value.
58 */
59#ifndef AIO_LISTIO_MAX
60# define AIO_LISTIO_MAX UINT32_MAX
61#endif
62
63#if 0 /* Only used for debugging */
64# undef AIO_LISTIO_MAX
65# define AIO_LISTIO_MAX 16
66#endif
67
68/** Invalid entry in the waiting array. */
69#define RTFILEAIOCTX_WAIT_ENTRY_INVALID (~0U)
70
71/** No-op replacement for rtFileAioCtxDump for non debug builds */
72#ifndef LOG_ENABLED
73# define rtFileAioCtxDump(pCtxInt) do {} while (0)
74#endif
75
76/*******************************************************************************
77* Structures and Typedefs *
78*******************************************************************************/
79/**
80 * Async I/O request state.
81 */
82typedef struct RTFILEAIOREQINTERNAL
83{
84 /** The aio control block. FIRST ELEMENT! */
85 struct aiocb AioCB;
86 /** Next element in the chain. */
87 struct RTFILEAIOREQINTERNAL *pNext;
88 /** Previous element in the chain. */
89 struct RTFILEAIOREQINTERNAL *pPrev;
90 /** Current state the request is in. */
91 RTFILEAIOREQSTATE enmState;
92 /** Flag whether this is a flush request. */
93 bool fFlush;
94 /** Flag indicating if the request was canceled. */
95 volatile bool fCanceled;
96 /** Opaque user data. */
97 void *pvUser;
98 /** Number of bytes actually transferred. */
99 size_t cbTransfered;
100 /** Status code. */
101 int Rc;
102 /** Completion context we are assigned to. */
103 struct RTFILEAIOCTXINTERNAL *pCtxInt;
104 /** Entry in the waiting list the request is in. */
105 unsigned iWaitingList;
106 /** Magic value (RTFILEAIOREQ_MAGIC). */
107 uint32_t u32Magic;
108} RTFILEAIOREQINTERNAL, *PRTFILEAIOREQINTERNAL;
109
110/**
111 * Async I/O completion context state.
112 */
113typedef struct RTFILEAIOCTXINTERNAL
114{
115 /** Current number of requests active on this context. */
116 volatile int32_t cRequests;
117 /** Maximum number of requests this context can handle. */
118 uint32_t cMaxRequests;
119 /** The ID of the thread which is currently waiting for requests. */
120 volatile RTTHREAD hThreadWait;
121 /** Flag whether the thread was woken up. */
122 volatile bool fWokenUp;
123 /** Flag whether the thread is currently waiting in the syscall. */
124 volatile bool fWaiting;
125 /** Flags given during creation. */
126 uint32_t fFlags;
127 /** Magic value (RTFILEAIOCTX_MAGIC). */
128 uint32_t u32Magic;
129 /** Flag whether the thread was woken up due to a internal event. */
130 volatile bool fWokenUpInternal;
131 /** List of new requests which needs to be inserted into apReqs by the
132 * waiting thread. */
133 volatile PRTFILEAIOREQINTERNAL apReqsNewHead[5];
134 /** Special entry for requests which are canceled. Because only one
135 * request can be canceled at a time and the thread canceling the request
136 * has to wait we need only one entry. */
137 volatile PRTFILEAIOREQINTERNAL pReqToCancel;
138 /** Event semaphore the canceling thread is waiting for completion of
139 * the operation. */
140 RTSEMEVENT SemEventCancel;
141 /** Head of submitted elements waiting to get into the array. */
142 PRTFILEAIOREQINTERNAL pReqsWaitHead;
143 /** Tail of submitted elements waiting to get into the array. */
144 PRTFILEAIOREQINTERNAL pReqsWaitTail;
145 /** Maximum number of elements in the waiting array. */
146 unsigned cReqsWaitMax;
147 /** First free slot in the waiting list. */
148 unsigned iFirstFree;
149 /** List of requests we are currently waiting on.
150 * Size depends on cMaxRequests and AIO_LISTIO_MAX. */
151 volatile PRTFILEAIOREQINTERNAL apReqs[1];
152} RTFILEAIOCTXINTERNAL, *PRTFILEAIOCTXINTERNAL;
153
154/**
155 * Internal worker for waking up the waiting thread.
156 */
157static void rtFileAioCtxWakeup(PRTFILEAIOCTXINTERNAL pCtxInt)
158{
159 /*
160 * Read the thread handle before the status flag.
161 * If we read the handle after the flag we might
162 * end up with an invalid handle because the thread
163 * waiting in RTFileAioCtxWakeup() might get scheduled
164 * before we read the flag and returns.
165 * We can ensure that the handle is valid if fWaiting is true
166 * when reading the handle before the status flag.
167 */
168 RTTHREAD hThread;
169 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
170 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
171 if (fWaiting)
172 {
173 /*
174 * If a thread waits the handle must be valid.
175 * It is possible that the thread returns from
176 * aio_suspend() before the signal is send.
177 * This is no problem because we already set fWokenUp
178 * to true which will let the thread return VERR_INTERRUPTED
179 * and the next call to RTFileAioCtxWait() will not
180 * return VERR_INTERRUPTED because signals are not saved
181 * and will simply vanish if the destination thread can't
182 * receive it.
183 */
184 Assert(hThread != NIL_RTTHREAD);
185 RTThreadPoke(hThread);
186 }
187}
188
189/**
190 * Internal worker processing events and inserting new requests into the waiting list.
191 */
192static int rtFileAioCtxProcessEvents(PRTFILEAIOCTXINTERNAL pCtxInt)
193{
194 int rc = VINF_SUCCESS;
195
196 /* Process new requests first. */
197 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, false);
198 if (fWokenUp)
199 {
200 for (unsigned iSlot = 0; iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead); iSlot++)
201 {
202 PRTFILEAIOREQINTERNAL pReqHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[iSlot], NULL, PRTFILEAIOREQINTERNAL);
203
204 while ( (pCtxInt->iFirstFree < pCtxInt->cReqsWaitMax)
205 && pReqHead)
206 {
207 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
208 pCtxInt->apReqs[pCtxInt->iFirstFree] = pReqHead;
209 pReqHead->iWaitingList = pCtxInt->iFirstFree;
210 pReqHead = pReqHead->pNext;
211
212 /* Clear pointer to next and previous element just for safety. */
213 pCtxInt->apReqs[pCtxInt->iFirstFree]->pNext = NULL;
214 pCtxInt->apReqs[pCtxInt->iFirstFree]->pPrev = NULL;
215 pCtxInt->iFirstFree++;
216
217 Assert( (pCtxInt->iFirstFree <= pCtxInt->cMaxRequests)
218 && (pCtxInt->iFirstFree <= pCtxInt->cReqsWaitMax));
219 }
220
221 /* Append the rest to the wait list. */
222 if (pReqHead)
223 {
224 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
225 if (!pCtxInt->pReqsWaitHead)
226 {
227 Assert(!pCtxInt->pReqsWaitTail);
228 pCtxInt->pReqsWaitHead = pReqHead;
229 pReqHead->pPrev = NULL;
230 }
231 else
232 {
233 AssertPtr(pCtxInt->pReqsWaitTail);
234
235 pCtxInt->pReqsWaitTail->pNext = pReqHead;
236 pReqHead->pPrev = pCtxInt->pReqsWaitTail;
237 }
238
239 /* Update tail. */
240 while (pReqHead->pNext)
241 {
242 RTFIELAIOREQ_ASSERT_STATE(pReqHead->pNext, SUBMITTED);
243 pReqHead = pReqHead->pNext;
244 }
245
246 pCtxInt->pReqsWaitTail = pReqHead;
247 pCtxInt->pReqsWaitTail->pNext = NULL;
248 }
249 }
250
251 /* Check if a request needs to be canceled. */
252 PRTFILEAIOREQINTERNAL pReqToCancel = ASMAtomicReadPtrT(&pCtxInt->pReqToCancel, PRTFILEAIOREQINTERNAL);
253 if (pReqToCancel)
254 {
255 /* The request can be in the array waiting for completion or still in the list because it is full. */
256 if (pReqToCancel->iWaitingList != RTFILEAIOCTX_WAIT_ENTRY_INVALID)
257 {
258 /* Put it out of the waiting list. */
259 pCtxInt->apReqs[pReqToCancel->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
260 pCtxInt->apReqs[pReqToCancel->iWaitingList]->iWaitingList = pReqToCancel->iWaitingList;
261 }
262 else
263 {
264 /* Unlink from the waiting list. */
265 PRTFILEAIOREQINTERNAL pPrev = pReqToCancel->pPrev;
266 PRTFILEAIOREQINTERNAL pNext = pReqToCancel->pNext;
267
268 if (pNext)
269 pNext->pPrev = pPrev;
270 else
271 {
272 /* We canceled the tail. */
273 pCtxInt->pReqsWaitTail = pPrev;
274 }
275
276 if (pPrev)
277 pPrev->pNext = pNext;
278 else
279 {
280 /* We canceled the head. */
281 pCtxInt->pReqsWaitHead = pNext;
282 }
283 }
284
285 ASMAtomicDecS32(&pCtxInt->cRequests);
286 AssertMsg(pCtxInt->cRequests >= 0, ("Canceled request not which is not in this context\n"));
287 RTSemEventSignal(pCtxInt->SemEventCancel);
288 }
289 }
290 else
291 {
292 if (ASMAtomicXchgBool(&pCtxInt->fWokenUp, false))
293 rc = VERR_INTERRUPTED;
294 }
295
296 return rc;
297}
298
299RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
300{
301 int rcBSD = 0;
302 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
303
304#if defined(RT_OS_DARWIN)
305 int cReqsOutstandingMax = 0;
306 size_t cbParameter = sizeof(int);
307
308 rcBSD = sysctlbyname("kern.aioprocmax", /* name */
309 &cReqsOutstandingMax, /* Where to store the old value. */
310 &cbParameter, /* Size of the memory pointed to. */
311 NULL, /* Where the new value is located. */
312 0); /* Where the size of the new value is stored. */
313 if (rcBSD == -1)
314 return RTErrConvertFromErrno(errno);
315
316 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
317 pAioLimits->cbBufferAlignment = 0;
318#elif defined(RT_OS_FREEBSD)
319 /*
320 * The AIO API is implemented in a kernel module which is not
321 * loaded by default.
322 * If it is loaded there are additional sysctl parameters.
323 */
324 int cReqsOutstandingMax = 0;
325 size_t cbParameter = sizeof(int);
326
327 rcBSD = sysctlbyname("vfs.aio.max_aio_per_proc", /* name */
328 &cReqsOutstandingMax, /* Where to store the old value. */
329 &cbParameter, /* Size of the memory pointed to. */
330 NULL, /* Where the new value is located. */
331 0); /* Where the size of the new value is stored. */
332 if (rcBSD == -1)
333 {
334 /* ENOENT means the value is unknown thus the module is not loaded. */
335 if (errno == ENOENT)
336 return VERR_NOT_SUPPORTED;
337 else
338 return RTErrConvertFromErrno(errno);
339 }
340
341 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
342 pAioLimits->cbBufferAlignment = 0;
343#else
344 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
345 pAioLimits->cbBufferAlignment = 0;
346#endif
347
348 return VINF_SUCCESS;
349}
350
351RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
352{
353 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
354
355 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOREQINTERNAL));
356 if (RT_UNLIKELY(!pReqInt))
357 return VERR_NO_MEMORY;
358
359 pReqInt->pCtxInt = NULL;
360 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
361 pReqInt->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
362 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
363
364 *phReq = (RTFILEAIOREQ)pReqInt;
365
366 return VINF_SUCCESS;
367}
368
369
370RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
371{
372 /*
373 * Validate the handle and ignore nil.
374 */
375 if (hReq == NIL_RTFILEAIOREQ)
376 return VINF_SUCCESS;
377 PRTFILEAIOREQINTERNAL pReqInt = hReq;
378 RTFILEAIOREQ_VALID_RETURN(pReqInt);
379 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
380
381 /*
382 * Trash the magic and free it.
383 */
384 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
385 RTMemFree(pReqInt);
386 return VINF_SUCCESS;
387}
388
389/**
390 * Worker setting up the request.
391 */
392DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
393 unsigned uTransferDirection,
394 RTFOFF off, void *pvBuf, size_t cbTransfer,
395 void *pvUser)
396{
397 /*
398 * Validate the input.
399 */
400 PRTFILEAIOREQINTERNAL pReqInt = hReq;
401 RTFILEAIOREQ_VALID_RETURN(pReqInt);
402 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
403 Assert(hFile != NIL_RTFILE);
404 AssertPtr(pvBuf);
405 Assert(off >= 0);
406 Assert(cbTransfer > 0);
407
408 memset(&pReqInt->AioCB, 0, sizeof(struct aiocb));
409 pReqInt->fFlush = false;
410 pReqInt->AioCB.aio_lio_opcode = uTransferDirection;
411 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
412 pReqInt->AioCB.aio_offset = off;
413 pReqInt->AioCB.aio_nbytes = cbTransfer;
414 pReqInt->AioCB.aio_buf = pvBuf;
415 pReqInt->pvUser = pvUser;
416 pReqInt->pCtxInt = NULL;
417 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
418 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
419
420 return VINF_SUCCESS;
421}
422
423
424RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
425 void *pvBuf, size_t cbRead, void *pvUser)
426{
427 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_READ,
428 off, pvBuf, cbRead, pvUser);
429}
430
431
432RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
433 void const *pvBuf, size_t cbWrite, void *pvUser)
434{
435 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_WRITE,
436 off, (void *)pvBuf, cbWrite, pvUser);
437}
438
439
440RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
441{
442 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)hReq;
443
444 RTFILEAIOREQ_VALID_RETURN(pReqInt);
445 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
446 Assert(hFile != NIL_RTFILE);
447
448 pReqInt->fFlush = true;
449 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
450 pReqInt->AioCB.aio_offset = 0;
451 pReqInt->AioCB.aio_nbytes = 0;
452 pReqInt->AioCB.aio_buf = NULL;
453 pReqInt->pvUser = pvUser;
454 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
455 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
456
457 return VINF_SUCCESS;
458}
459
460
461RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
462{
463 PRTFILEAIOREQINTERNAL pReqInt = hReq;
464 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
465
466 return pReqInt->pvUser;
467}
468
469
470RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
471{
472 PRTFILEAIOREQINTERNAL pReqInt = hReq;
473 RTFILEAIOREQ_VALID_RETURN(pReqInt);
474 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
475
476 ASMAtomicXchgBool(&pReqInt->fCanceled, true);
477
478 int rcPosix = aio_cancel(pReqInt->AioCB.aio_fildes, &pReqInt->AioCB);
479
480 if (rcPosix == AIO_CANCELED)
481 {
482 PRTFILEAIOCTXINTERNAL pCtxInt = pReqInt->pCtxInt;
483 /*
484 * Notify the waiting thread that the request was canceled.
485 */
486 AssertMsg(VALID_PTR(pCtxInt),
487 ("Invalid state. Request was canceled but wasn't submitted\n"));
488
489 Assert(!pCtxInt->pReqToCancel);
490 ASMAtomicWritePtr(&pCtxInt->pReqToCancel, pReqInt);
491 rtFileAioCtxWakeup(pCtxInt);
492
493 /* Wait for acknowledge. */
494 int rc = RTSemEventWait(pCtxInt->SemEventCancel, RT_INDEFINITE_WAIT);
495 AssertRC(rc);
496
497 ASMAtomicWriteNullPtr(&pCtxInt->pReqToCancel);
498 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
499 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
500 return VINF_SUCCESS;
501 }
502 else if (rcPosix == AIO_ALLDONE)
503 return VERR_FILE_AIO_COMPLETED;
504 else if (rcPosix == AIO_NOTCANCELED)
505 return VERR_FILE_AIO_IN_PROGRESS;
506 else
507 return RTErrConvertFromErrno(errno);
508}
509
510
511RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
512{
513 PRTFILEAIOREQINTERNAL pReqInt = hReq;
514 RTFILEAIOREQ_VALID_RETURN(pReqInt);
515 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
516 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
517 AssertPtrNull(pcbTransfered);
518
519 if ( (RT_SUCCESS(pReqInt->Rc))
520 && (pcbTransfered))
521 *pcbTransfered = pReqInt->cbTransfered;
522
523 return pReqInt->Rc;
524}
525
526
527RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
528 uint32_t fFlags)
529{
530 PRTFILEAIOCTXINTERNAL pCtxInt;
531 unsigned cReqsWaitMax;
532
533 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
534 AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
535
536 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
537 return VERR_OUT_OF_RANGE;
538
539 cReqsWaitMax = RT_MIN(cAioReqsMax, AIO_LISTIO_MAX);
540
541 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ( sizeof(RTFILEAIOCTXINTERNAL)
542 + cReqsWaitMax * sizeof(PRTFILEAIOREQINTERNAL));
543 if (RT_UNLIKELY(!pCtxInt))
544 return VERR_NO_MEMORY;
545
546 /* Create event semaphore. */
547 int rc = RTSemEventCreate(&pCtxInt->SemEventCancel);
548 if (RT_FAILURE(rc))
549 {
550 RTMemFree(pCtxInt);
551 return rc;
552 }
553
554 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
555 pCtxInt->cMaxRequests = cAioReqsMax;
556 pCtxInt->cReqsWaitMax = cReqsWaitMax;
557 pCtxInt->fFlags = fFlags;
558 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
559
560 return VINF_SUCCESS;
561}
562
563
564RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
565{
566 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
567
568 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
569
570 if (RT_UNLIKELY(pCtxInt->cRequests))
571 return VERR_FILE_AIO_BUSY;
572
573 RTSemEventDestroy(pCtxInt->SemEventCancel);
574 RTMemFree(pCtxInt);
575
576 return VINF_SUCCESS;
577}
578
579
580RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
581{
582 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
583
584 if (hAioCtx == NIL_RTFILEAIOCTX)
585 return RTFILEAIO_UNLIMITED_REQS;
586 return pCtxInt->cMaxRequests;
587}
588
589RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
590{
591 NOREF(hAioCtx); NOREF(hFile);
592 return VINF_SUCCESS;
593}
594
595#ifdef LOG_ENABLED
596/**
597 * Dumps the state of a async I/O context.
598 */
599static void rtFileAioCtxDump(PRTFILEAIOCTXINTERNAL pCtxInt)
600{
601 LogFlow(("cRequests=%d\n", pCtxInt->cRequests));
602 LogFlow(("cMaxRequests=%u\n", pCtxInt->cMaxRequests));
603 LogFlow(("hThreadWait=%#p\n", pCtxInt->hThreadWait));
604 LogFlow(("fWokenUp=%RTbool\n", pCtxInt->fWokenUp));
605 LogFlow(("fWaiting=%RTbool\n", pCtxInt->fWaiting));
606 LogFlow(("fWokenUpInternal=%RTbool\n", pCtxInt->fWokenUpInternal));
607 for (unsigned i = 0; i < RT_ELEMENTS(pCtxInt->apReqsNewHead); i++)
608 LogFlow(("apReqsNewHead[%u]=%#p\n", i, pCtxInt->apReqsNewHead[i]));
609 LogFlow(("pReqToCancel=%#p\n", pCtxInt->pReqToCancel));
610 LogFlow(("pReqsWaitHead=%#p\n", pCtxInt->pReqsWaitHead));
611 LogFlow(("pReqsWaitTail=%#p\n", pCtxInt->pReqsWaitTail));
612 LogFlow(("cReqsWaitMax=%u\n", pCtxInt->cReqsWaitMax));
613 LogFlow(("iFirstFree=%u\n", pCtxInt->iFirstFree));
614 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
615 LogFlow(("apReqs[%u]=%#p\n", i, pCtxInt->apReqs[i]));
616}
617#endif
618
619RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
620{
621 int rc = VINF_SUCCESS;
622 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
623
624 /* Parameter checks */
625 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
626 AssertReturn(cReqs != 0, VERR_INVALID_POINTER);
627 AssertPtrReturn(pahReqs, VERR_INVALID_PARAMETER);
628
629 rtFileAioCtxDump(pCtxInt);
630
631 /* Check that we don't exceed the limit */
632 if (ASMAtomicUoReadS32(&pCtxInt->cRequests) + cReqs > pCtxInt->cMaxRequests)
633 return VERR_FILE_AIO_LIMIT_EXCEEDED;
634
635 PRTFILEAIOREQINTERNAL pHead = NULL;
636
637 do
638 {
639 int rcPosix = 0;
640 size_t cReqsSubmit = 0;
641 size_t i = 0;
642 PRTFILEAIOREQINTERNAL pReqInt;
643
644 while ( (i < cReqs)
645 && (i < AIO_LISTIO_MAX))
646 {
647 pReqInt = pahReqs[i];
648 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
649 {
650 /* Undo everything and stop submitting. */
651 for (size_t iUndo = 0; iUndo < i; iUndo++)
652 {
653 pReqInt = pahReqs[iUndo];
654 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
655 pReqInt->pCtxInt = NULL;
656
657 /* Unlink from the list again. */
658 PRTFILEAIOREQINTERNAL pNext, pPrev;
659 pNext = pReqInt->pNext;
660 pPrev = pReqInt->pPrev;
661 if (pNext)
662 pNext->pPrev = pPrev;
663 if (pPrev)
664 pPrev->pNext = pNext;
665 else
666 pHead = pNext;
667 }
668 rc = VERR_INVALID_HANDLE;
669 break;
670 }
671
672 pReqInt->pCtxInt = pCtxInt;
673
674 if (pReqInt->fFlush)
675 break;
676
677 /* Link them together. */
678 pReqInt->pNext = pHead;
679 if (pHead)
680 pHead->pPrev = pReqInt;
681 pReqInt->pPrev = NULL;
682 pHead = pReqInt;
683 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
684
685 cReqsSubmit++;
686 i++;
687 }
688
689 if (cReqsSubmit)
690 {
691 rcPosix = lio_listio(LIO_NOWAIT, (struct aiocb **)pahReqs, cReqsSubmit, NULL);
692 if (RT_UNLIKELY(rcPosix < 0))
693 {
694 size_t cReqsSubmitted = cReqsSubmit;
695
696 if (errno == EAGAIN)
697 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
698 else
699 rc = RTErrConvertFromErrno(errno);
700
701 /* Check which ones were not submitted. */
702 for (i = 0; i < cReqsSubmit; i++)
703 {
704 pReqInt = pahReqs[i];
705
706 rcPosix = aio_error(&pReqInt->AioCB);
707
708 if ((rcPosix != EINPROGRESS) && (rcPosix != 0))
709 {
710 cReqsSubmitted--;
711
712#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
713 if (errno == EINVAL)
714#else
715 if (rcPosix == EINVAL)
716#endif
717 {
718 /* Was not submitted. */
719 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
720 }
721 else
722 {
723 /* An error occurred. */
724 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
725
726 /*
727 * Looks like Apple and glibc interpret the standard in different ways.
728 * glibc returns the error code which would be in errno but Apple returns
729 * -1 and sets errno to the appropriate value
730 */
731#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
732 Assert(rcPosix == -1);
733 pReqInt->Rc = RTErrConvertFromErrno(errno);
734#elif defined(RT_OS_LINUX)
735 pReqInt->Rc = RTErrConvertFromErrno(rcPosix);
736#endif
737 pReqInt->cbTransfered = 0;
738 }
739 /* Unlink from the list. */
740 PRTFILEAIOREQINTERNAL pNext, pPrev;
741 pNext = pReqInt->pNext;
742 pPrev = pReqInt->pPrev;
743 if (pNext)
744 pNext->pPrev = pPrev;
745 if (pPrev)
746 pPrev->pNext = pNext;
747 else
748 pHead = pNext;
749
750 pReqInt->pNext = NULL;
751 pReqInt->pPrev = NULL;
752 }
753 }
754 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
755 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
756 break;
757 }
758
759 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmit);
760 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
761 cReqs -= cReqsSubmit;
762 pahReqs += cReqsSubmit;
763 }
764
765 /*
766 * Check if we have a flush request now.
767 * If not we hit the AIO_LISTIO_MAX limit
768 * and will continue submitting requests
769 * above.
770 */
771 if (cReqs && RT_SUCCESS_NP(rc))
772 {
773 pReqInt = pahReqs[0];
774
775 if (pReqInt->fFlush)
776 {
777 /*
778 * lio_listio does not work with flush requests so
779 * we have to use aio_fsync directly.
780 */
781 rcPosix = aio_fsync(O_SYNC, &pReqInt->AioCB);
782 if (RT_UNLIKELY(rcPosix < 0))
783 {
784 if (errno == EAGAIN)
785 {
786 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
787 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
788 }
789 else
790 {
791 rc = RTErrConvertFromErrno(errno);
792 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
793 pReqInt->Rc = rc;
794 }
795 pReqInt->cbTransfered = 0;
796 break;
797 }
798
799 /* Link them together. */
800 pReqInt->pNext = pHead;
801 if (pHead)
802 pHead->pPrev = pReqInt;
803 pReqInt->pPrev = NULL;
804 pHead = pReqInt;
805 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
806
807 ASMAtomicIncS32(&pCtxInt->cRequests);
808 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
809 cReqs--;
810 pahReqs++;
811 }
812 }
813 } while ( cReqs
814 && RT_SUCCESS_NP(rc));
815
816 if (pHead)
817 {
818 /*
819 * Forward successfully submitted requests to the thread waiting for requests.
820 * We search for a free slot first and if we don't find one
821 * we will grab the first one and append our list to the existing entries.
822 */
823 unsigned iSlot = 0;
824 while ( (iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead))
825 && !ASMAtomicCmpXchgPtr(&pCtxInt->apReqsNewHead[iSlot], pHead, NULL))
826 iSlot++;
827
828 if (iSlot == RT_ELEMENTS(pCtxInt->apReqsNewHead))
829 {
830 /* Nothing found. */
831 PRTFILEAIOREQINTERNAL pOldHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[0], NULL, PRTFILEAIOREQINTERNAL);
832
833 /* Find the end of the current head and link the old list to the current. */
834 PRTFILEAIOREQINTERNAL pTail = pHead;
835 while (pTail->pNext)
836 pTail = pTail->pNext;
837
838 pTail->pNext = pOldHead;
839
840 ASMAtomicWritePtr(&pCtxInt->apReqsNewHead[0], pHead);
841 }
842
843 /* Set the internal wakeup flag and wakeup the thread if possible. */
844 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, true);
845 if (!fWokenUp)
846 rtFileAioCtxWakeup(pCtxInt);
847 }
848
849 rtFileAioCtxDump(pCtxInt);
850
851 return rc;
852}
853
854
855RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
856 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
857{
858 int rc = VINF_SUCCESS;
859 int cRequestsCompleted = 0;
860 PRTFILEAIOCTXINTERNAL pCtxInt = (PRTFILEAIOCTXINTERNAL)hAioCtx;
861 struct timespec Timeout;
862 struct timespec *pTimeout = NULL;
863 uint64_t StartNanoTS = 0;
864
865 LogFlowFunc(("hAioCtx=%#p cMinReqs=%zu cMillies=%u pahReqs=%#p cReqs=%zu pcbReqs=%#p\n",
866 hAioCtx, cMinReqs, cMillies, pahReqs, cReqs, pcReqs));
867
868 /* Check parameters. */
869 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
870 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
871 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
872 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
873 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
874
875 rtFileAioCtxDump(pCtxInt);
876
877 int32_t cRequestsWaiting = ASMAtomicReadS32(&pCtxInt->cRequests);
878
879 if ( RT_UNLIKELY(cRequestsWaiting <= 0)
880 && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
881 return VERR_FILE_AIO_NO_REQUEST;
882
883 if (RT_UNLIKELY(cMinReqs > (uint32_t)cRequestsWaiting))
884 return VERR_INVALID_PARAMETER;
885
886 if (cMillies != RT_INDEFINITE_WAIT)
887 {
888 Timeout.tv_sec = cMillies / 1000;
889 Timeout.tv_nsec = (cMillies % 1000) * 1000000;
890 pTimeout = &Timeout;
891 StartNanoTS = RTTimeNanoTS();
892 }
893
894 /* Wait for at least one. */
895 if (!cMinReqs)
896 cMinReqs = 1;
897
898 /* For the wakeup call. */
899 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
900 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
901
902 /* Update the waiting list once before we enter the loop. */
903 rc = rtFileAioCtxProcessEvents(pCtxInt);
904
905 while ( cMinReqs
906 && RT_SUCCESS_NP(rc))
907 {
908#ifdef RT_STRICT
909 if (RT_UNLIKELY(!pCtxInt->iFirstFree))
910 {
911 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
912 RTAssertMsg2Weak("wait[%d] = %#p\n", i, pCtxInt->apReqs[i]);
913
914 AssertMsgFailed(("No request to wait for. pReqsWaitHead=%#p pReqsWaitTail=%#p\n",
915 pCtxInt->pReqsWaitHead, pCtxInt->pReqsWaitTail));
916 }
917#endif
918
919 LogFlow(("Waiting for %d requests to complete\n", pCtxInt->iFirstFree));
920 rtFileAioCtxDump(pCtxInt);
921
922 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
923 int rcPosix = aio_suspend((const struct aiocb * const *)pCtxInt->apReqs,
924 pCtxInt->iFirstFree, pTimeout);
925 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
926 if (rcPosix < 0)
927 {
928 LogFlow(("aio_suspend failed %d nent=%u\n", errno, pCtxInt->iFirstFree));
929 /* Check that this is an external wakeup event. */
930 if (errno == EINTR)
931 rc = rtFileAioCtxProcessEvents(pCtxInt);
932 else
933 rc = RTErrConvertFromErrno(errno);
934 }
935 else
936 {
937 /* Requests finished. */
938 unsigned iReqCurr = 0;
939 unsigned cDone = 0;
940
941 /* Remove completed requests from the waiting list. */
942 while ( (iReqCurr < pCtxInt->iFirstFree)
943 && (cDone < cReqs))
944 {
945 PRTFILEAIOREQINTERNAL pReq = pCtxInt->apReqs[iReqCurr];
946 int rcReq = aio_error(&pReq->AioCB);
947
948 if (rcReq != EINPROGRESS)
949 {
950 /* Completed store the return code. */
951 if (rcReq == 0)
952 {
953 pReq->Rc = VINF_SUCCESS;
954 /* Call aio_return() to free resources. */
955 pReq->cbTransfered = aio_return(&pReq->AioCB);
956 }
957 else
958 {
959#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
960 pReq->Rc = RTErrConvertFromErrno(errno);
961#else
962 pReq->Rc = RTErrConvertFromErrno(rcReq);
963#endif
964 }
965
966 /* Mark the request as finished. */
967 RTFILEAIOREQ_SET_STATE(pReq, COMPLETED);
968 cDone++;
969
970 /* If there are other entries waiting put the head into the now free entry. */
971 if (pCtxInt->pReqsWaitHead)
972 {
973 PRTFILEAIOREQINTERNAL pReqInsert = pCtxInt->pReqsWaitHead;
974
975 pCtxInt->pReqsWaitHead = pReqInsert->pNext;
976 if (!pCtxInt->pReqsWaitHead)
977 {
978 /* List is empty now. Clear tail too. */
979 pCtxInt->pReqsWaitTail = NULL;
980 }
981
982 pReqInsert->iWaitingList = pReq->iWaitingList;
983 pCtxInt->apReqs[pReqInsert->iWaitingList] = pReqInsert;
984 iReqCurr++;
985 }
986 else
987 {
988 /*
989 * Move the last entry into the current position to avoid holes
990 * but only if it is not the last element already.
991 */
992 if (pReq->iWaitingList < pCtxInt->iFirstFree - 1)
993 {
994 pCtxInt->apReqs[pReq->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
995 pCtxInt->apReqs[pReq->iWaitingList]->iWaitingList = pReq->iWaitingList;
996 }
997 else
998 pCtxInt->iFirstFree--;
999
1000 pCtxInt->apReqs[pCtxInt->iFirstFree] = NULL;
1001 }
1002
1003 /* Put the request into the completed list. */
1004 pahReqs[cRequestsCompleted++] = pReq;
1005 pReq->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
1006 }
1007 else
1008 iReqCurr++;
1009 }
1010
1011 AssertMsg((cDone <= cReqs), ("Overflow cReqs=%u cMinReqs=%u cDone=%u\n",
1012 cReqs, cDone));
1013 cReqs -= cDone;
1014 cMinReqs = RT_MAX(cMinReqs, cDone) - cDone;
1015 ASMAtomicSubS32(&pCtxInt->cRequests, cDone);
1016
1017 AssertMsg(pCtxInt->cRequests >= 0, ("Finished more requests than currently active\n"));
1018
1019 if (!cMinReqs)
1020 break;
1021
1022 if (cMillies != RT_INDEFINITE_WAIT)
1023 {
1024 uint64_t TimeDiff;
1025
1026 /* Recalculate the timeout. */
1027 TimeDiff = RTTimeSystemNanoTS() - StartNanoTS;
1028 Timeout.tv_sec = Timeout.tv_sec - (TimeDiff / 1000000);
1029 Timeout.tv_nsec = Timeout.tv_nsec - (TimeDiff % 1000000);
1030 }
1031
1032 /* Check for new elements. */
1033 rc = rtFileAioCtxProcessEvents(pCtxInt);
1034 }
1035 }
1036
1037 *pcReqs = cRequestsCompleted;
1038 Assert(pCtxInt->hThreadWait == RTThreadSelf());
1039 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
1040
1041 rtFileAioCtxDump(pCtxInt);
1042
1043 return rc;
1044}
1045
1046
1047RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
1048{
1049 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
1050 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
1051
1052 /** @todo r=bird: Define the protocol for how to resume work after calling
1053 * this function. */
1054
1055 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
1056 if (!fWokenUp)
1057 rtFileAioCtxWakeup(pCtxInt);
1058
1059 return VINF_SUCCESS;
1060}
1061
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette