VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 99863

Last change on this file since 99863 was 99739, checked in by vboxsync, 19 months ago

*: doxygen corrections (mostly about removing @returns from functions returning void).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 99739 2023-05-11 01:01:08Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
33#include <iprt/types.h>
34#include <iprt/asm.h>
35#include <iprt/file.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38#include <iprt/assert.h>
39#include <VBox/log.h>
40
41#include "PDMAsyncCompletionFileInternal.h"
42
43
44/*********************************************************************************************************************************
45* Defined Constants And Macros *
46*********************************************************************************************************************************/
47/** The update period for the I/O load statistics in ms. */
48#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
49/** Maximum number of requests a manager will handle. */
50#define PDMACEPFILEMGR_REQS_STEP 64
51
52
53/*********************************************************************************************************************************
54* Internal functions *
55*********************************************************************************************************************************/
56static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
57 PPDMACEPFILEMGR pAioMgr,
58 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
59
60static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
61 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
62 PPDMACFILERANGELOCK pRangeLock);
63
64static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
65 int rc, size_t cbTransfered);
66
67
68int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
69{
70 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
71
72 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
73 if (rc == VERR_OUT_OF_RANGE)
74 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
75
76 if (RT_SUCCESS(rc))
77 {
78 /* Initialize request handle array. */
79 pAioMgr->iFreeEntry = 0;
80 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
81 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
82
83 if (pAioMgr->pahReqsFree)
84 {
85 /* Create the range lock memcache. */
86 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
87 0, UINT32_MAX, NULL, NULL, NULL, 0);
88 if (RT_SUCCESS(rc))
89 return VINF_SUCCESS;
90
91 RTMemFree(pAioMgr->pahReqsFree);
92 }
93 else
94 {
95 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
96 rc = VERR_NO_MEMORY;
97 }
98 }
99
100 return rc;
101}
102
103void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
104{
105 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
106
107 while (pAioMgr->iFreeEntry > 0)
108 {
109 pAioMgr->iFreeEntry--;
110 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
111 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
112 }
113
114 RTMemFree(pAioMgr->pahReqsFree);
115 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
116}
117
118#if 0 /* currently unused */
119/**
120 * Sorts the endpoint list with insertion sort.
121 */
122static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
123{
124 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
125
126 pEpPrev = pAioMgr->pEndpointsHead;
127 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
128
129 while (pEpCurr)
130 {
131 /* Remember the next element to sort because the list might change. */
132 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
133
134 /* Unlink the current element from the list. */
135 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
136 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
137
138 if (pPrev)
139 pPrev->AioMgr.pEndpointNext = pNext;
140 else
141 pAioMgr->pEndpointsHead = pNext;
142
143 if (pNext)
144 pNext->AioMgr.pEndpointPrev = pPrev;
145
146 /* Go back until we reached the place to insert the current endpoint into. */
147 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
148 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
149
150 /* Link the endpoint into the list. */
151 if (pEpPrev)
152 pNext = pEpPrev->AioMgr.pEndpointNext;
153 else
154 pNext = pAioMgr->pEndpointsHead;
155
156 pEpCurr->AioMgr.pEndpointNext = pNext;
157 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
158
159 if (pNext)
160 pNext->AioMgr.pEndpointPrev = pEpCurr;
161
162 if (pEpPrev)
163 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
164 else
165 pAioMgr->pEndpointsHead = pEpCurr;
166
167 pEpCurr = pEpNextToSort;
168 }
169
170#ifdef DEBUG
171 /* Validate sorting algorithm */
172 unsigned cEndpoints = 0;
173 pEpCurr = pAioMgr->pEndpointsHead;
174
175 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
176 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
177
178 while (pEpCurr)
179 {
180 cEndpoints++;
181
182 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
183 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
184
185 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
186 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
187
188 pEpCurr = pNext;
189 }
190
191 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
192
193#endif
194}
195#endif /* currently unused */
196
197/**
198 * Removes an endpoint from the currently assigned manager.
199 *
200 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
201 * FALSE otherwise.
202 * @param pEndpointRemove The endpoint to remove.
203 */
204static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
205{
206 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
207 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
208 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
209
210 pAioMgr->cEndpoints--;
211
212 if (pPrev)
213 pPrev->AioMgr.pEndpointNext = pNext;
214 else
215 pAioMgr->pEndpointsHead = pNext;
216
217 if (pNext)
218 pNext->AioMgr.pEndpointPrev = pPrev;
219
220 /* Make sure that there is no request pending on this manager for the endpoint. */
221 if (!pEndpointRemove->AioMgr.cRequestsActive)
222 {
223 Assert(!pEndpointRemove->pFlushReq);
224
225 /* Reopen the file so that the new endpoint can re-associate with the file */
226 RTFileClose(pEndpointRemove->hFile);
227 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
228 AssertRC(rc);
229 return false;
230 }
231
232 return true;
233}
234
235#if 0 /* currently unused */
236
237static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
238{
239 /* Balancing doesn't make sense with only one endpoint. */
240 if (pAioMgr->cEndpoints == 1)
241 return false;
242
243 /* Doesn't make sens to move endpoints if only one produces the whole load */
244 unsigned cEndpointsWithLoad = 0;
245
246 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
247
248 while (pCurr)
249 {
250 if (pCurr->AioMgr.cReqsPerSec)
251 cEndpointsWithLoad++;
252
253 pCurr = pCurr->AioMgr.pEndpointNext;
254 }
255
256 return (cEndpointsWithLoad > 1);
257}
258
259/**
260 * Creates a new I/O manager and spreads the I/O load of the endpoints
261 * between the given I/O manager and the new one.
262 *
263 * @param pAioMgr The I/O manager with high I/O load.
264 */
265static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
266{
267 /*
268 * Check if balancing would improve the situation.
269 */
270 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
271 {
272 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
273 PPDMACEPFILEMGR pAioMgrNew = NULL;
274
275 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
276 if (RT_SUCCESS(rc))
277 {
278 /* We will sort the list by request count per second. */
279 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
280
281 /* Now move some endpoints to the new manager. */
282 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
283 unsigned cReqsOther = 0;
284 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
285
286 while (pCurr)
287 {
288 if (cReqsHere <= cReqsOther)
289 {
290 /*
291 * The other manager has more requests to handle now.
292 * We will keep the current endpoint.
293 */
294 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
295 cReqsHere += pCurr->AioMgr.cReqsPerSec;
296 pCurr = pCurr->AioMgr.pEndpointNext;
297 }
298 else
299 {
300 /* Move to other endpoint. */
301 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
302 cReqsOther += pCurr->AioMgr.cReqsPerSec;
303
304 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
305
306 pCurr = pCurr->AioMgr.pEndpointNext;
307
308 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
309
310 if (fReqsPending)
311 {
312 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
313 pMove->AioMgr.fMoving = true;
314 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
315 }
316 else
317 {
318 pMove->AioMgr.fMoving = false;
319 pMove->AioMgr.pAioMgrDst = NULL;
320 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
321 }
322 }
323 }
324 }
325 else
326 {
327 /* Don't process further but leave a log entry about reduced performance. */
328 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
329 }
330 }
331 else
332 Log(("AIOMgr: Load balancing would not improve anything\n"));
333}
334
335#endif /* unused */
336
337/**
338 * Increase the maximum number of active requests for the given I/O manager.
339 *
340 * @returns VBox status code.
341 * @param pAioMgr The I/O manager to grow.
342 */
343static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
344{
345 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
346
347 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
348 && !pAioMgr->cRequestsActive,
349 ("Invalid state of the I/O manager\n"));
350
351#ifdef RT_OS_WINDOWS
352 /*
353 * Reopen the files of all assigned endpoints first so we can assign them to the new
354 * I/O context.
355 */
356 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
357
358 while (pCurr)
359 {
360 RTFileClose(pCurr->hFile);
361 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
362
363 pCurr = pCurr->AioMgr.pEndpointNext;
364 }
365#endif
366
367 /* Create the new bigger context. */
368 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
369
370 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
371 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
372 if (rc == VERR_OUT_OF_RANGE)
373 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
374
375 if (RT_SUCCESS(rc))
376 {
377 /* Close the old context. */
378 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
379 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
380
381 pAioMgr->hAioCtx = hAioCtxNew;
382
383 /* Create a new I/O task handle array */
384 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
385 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
386
387 if (pahReqNew)
388 {
389 /* Copy the cached request handles. */
390 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
391 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
392
393 RTMemFree(pAioMgr->pahReqsFree);
394 pAioMgr->pahReqsFree = pahReqNew;
395 pAioMgr->cReqEntries = cReqEntriesNew;
396 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
397 pAioMgr->cRequestsActiveMax));
398 }
399 else
400 rc = VERR_NO_MEMORY;
401 }
402
403#ifdef RT_OS_WINDOWS
404 /* Assign the file to the new context. */
405 pCurr = pAioMgr->pEndpointsHead;
406 while (pCurr)
407 {
408 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
409 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
410
411 pCurr = pCurr->AioMgr.pEndpointNext;
412 }
413#endif
414
415 if (RT_FAILURE(rc))
416 {
417 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
418 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
419 }
420
421 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
422 LogFlowFunc(("returns rc=%Rrc\n", rc));
423
424 return rc;
425}
426
427/**
428 * Checks if a given status code is fatal.
429 * Non fatal errors can be fixed by migrating the endpoint to a
430 * failsafe manager.
431 *
432 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
433 * false If the error can be fixed by a migration. (image on NFS disk for example)
434 * @param rcReq The status code to check.
435 */
436DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
437{
438 return rcReq == VERR_DEV_IO_ERROR
439 || rcReq == VERR_FILE_IO_ERROR
440 || rcReq == VERR_DISK_IO_ERROR
441 || rcReq == VERR_DISK_FULL
442 || rcReq == VERR_FILE_TOO_BIG;
443}
444
445/**
446 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
447 *
448 * @returns VBox status code
449 * @param pAioMgr The I/O manager the error occurred on.
450 * @param rc The error code.
451 * @param SRC_POS The source location of the error (use RT_SRC_POS).
452 */
453static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
454{
455 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
456 pAioMgr, rc));
457 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
458 LogRel(("AIOMgr: Please contact the product vendor\n"));
459
460 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
461
462 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
463 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
464
465 AssertMsgFailed(("Implement\n"));
466 return VINF_SUCCESS;
467}
468
469/**
470 * Put a list of tasks in the pending request list of an endpoint.
471 */
472DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
473{
474 /* Add the rest of the tasks to the pending list */
475 if (!pEndpoint->AioMgr.pReqsPendingHead)
476 {
477 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
478 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
479 }
480 else
481 {
482 Assert(pEndpoint->AioMgr.pReqsPendingTail);
483 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
484 }
485
486 /* Update the tail. */
487 while (pTaskHead->pNext)
488 pTaskHead = pTaskHead->pNext;
489
490 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
491 pTaskHead->pNext = NULL;
492}
493
494/**
495 * Put one task in the pending request list of an endpoint.
496 */
497DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
498{
499 /* Add the rest of the tasks to the pending list */
500 if (!pEndpoint->AioMgr.pReqsPendingHead)
501 {
502 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
503 pEndpoint->AioMgr.pReqsPendingHead = pTask;
504 }
505 else
506 {
507 Assert(pEndpoint->AioMgr.pReqsPendingTail);
508 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
509 }
510
511 pEndpoint->AioMgr.pReqsPendingTail = pTask;
512 pTask->pNext = NULL;
513}
514
515/**
516 * Allocates a async I/O request.
517 *
518 * @returns Handle to the request.
519 * @param pAioMgr The I/O manager.
520 */
521static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
522{
523 /* Get a request handle. */
524 RTFILEAIOREQ hReq;
525 if (pAioMgr->iFreeEntry > 0)
526 {
527 pAioMgr->iFreeEntry--;
528 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
529 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
530 Assert(hReq != NIL_RTFILEAIOREQ);
531 }
532 else
533 {
534 int rc = RTFileAioReqCreate(&hReq);
535 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
536 }
537
538 return hReq;
539}
540
541/**
542 * Frees a async I/O request handle.
543 *
544 * @param pAioMgr The I/O manager.
545 * @param hReq The I/O request handle to free.
546 */
547static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
548{
549 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
550 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
551
552 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
553 pAioMgr->iFreeEntry++;
554}
555
556/**
557 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
558 */
559static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
560 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
561 PRTFILEAIOREQ pahReqs, unsigned cReqs)
562{
563 pAioMgr->cRequestsActive += cReqs;
564 pEndpoint->AioMgr.cRequestsActive += cReqs;
565
566 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
567 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
568
569 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
570 if (RT_FAILURE(rc))
571 {
572 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
573 {
574 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
575
576 /* Append any not submitted task to the waiting list. */
577 for (size_t i = 0; i < cReqs; i++)
578 {
579 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
580
581 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
582 {
583 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
584
585 Assert(pTask->hReq == pahReqs[i]);
586 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
587 pAioMgr->cRequestsActive--;
588 pEndpoint->AioMgr.cRequestsActive--;
589
590 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
591 {
592 /* Clear the pending flush */
593 Assert(pEndpoint->pFlushReq == pTask);
594 pEndpoint->pFlushReq = NULL;
595 }
596 }
597 }
598
599 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
600
601 /* Print an entry in the release log */
602 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
603 {
604 pEpClass->fOutOfResourcesWarningPrinted = true;
605 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
606 pAioMgr->cRequestsActive));
607 }
608
609 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
610 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
611 rc = VINF_SUCCESS;
612 }
613 else /* Another kind of error happened (full disk, ...) */
614 {
615 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
616 for (size_t i = 0; i < cReqs; i++)
617 {
618 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
619
620 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
621 {
622 /* We call ourself again to do any error handling which might come up now. */
623 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
624 AssertRC(rc);
625 }
626 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
627 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
628 }
629
630
631 if ( pEndpoint->pFlushReq
632 && !pAioMgr->cRequestsActive
633 && !pEndpoint->fAsyncFlushSupported)
634 {
635 /*
636 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
637 * the async flush API.
638 * Happens only if this we just noticed that this is not supported
639 * and the only active request was a flush.
640 */
641 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
642 pEndpoint->pFlushReq = NULL;
643 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
644 pdmacFileTaskFree(pEndpoint, pFlush);
645 }
646 }
647 }
648
649 return VINF_SUCCESS;
650}
651
652static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
653 RTFOFF offStart, size_t cbRange,
654 PPDMACTASKFILE pTask, bool fAlignedReq)
655{
656 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
657 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
658 ("Invalid task type %d\n", pTask->enmTransferType));
659
660 /*
661 * If there is no unaligned request active and the current one is aligned
662 * just pass it through.
663 */
664 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
665 return false;
666
667 PPDMACFILERANGELOCK pRangeLock;
668 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
669 if (!pRangeLock)
670 {
671 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
672 /* Check if we intersect with the range. */
673 if ( !pRangeLock
674 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
675 && (pRangeLock->Core.KeyLast) >= offStart))
676 {
677 pRangeLock = NULL; /* False alarm */
678 }
679 }
680
681 /* Check whether we have one of the situations explained below */
682 if (pRangeLock)
683 {
684 /* Add to the list. */
685 pTask->pNext = NULL;
686
687 if (!pRangeLock->pWaitingTasksHead)
688 {
689 Assert(!pRangeLock->pWaitingTasksTail);
690 pRangeLock->pWaitingTasksHead = pTask;
691 pRangeLock->pWaitingTasksTail = pTask;
692 }
693 else
694 {
695 AssertPtr(pRangeLock->pWaitingTasksTail);
696 pRangeLock->pWaitingTasksTail->pNext = pTask;
697 pRangeLock->pWaitingTasksTail = pTask;
698 }
699 return true;
700 }
701
702 return false;
703}
704
705static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
706 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
707 RTFOFF offStart, size_t cbRange,
708 PPDMACTASKFILE pTask, bool fAlignedReq)
709{
710 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
711 pAioMgr, pEndpoint, offStart, cbRange, pTask));
712
713 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
714 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
715 offStart, cbRange));
716
717 /*
718 * If there is no unaligned request active and the current one is aligned
719 * just don't use the lock.
720 */
721 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
722 {
723 pTask->pRangeLock = NULL;
724 return VINF_SUCCESS;
725 }
726
727 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
728 if (!pRangeLock)
729 return VERR_NO_MEMORY;
730
731 /* Init the lock. */
732 pRangeLock->Core.Key = offStart;
733 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
734 pRangeLock->cRefs = 1;
735 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
736 pRangeLock->pWaitingTasksHead = NULL;
737 pRangeLock->pWaitingTasksTail = NULL;
738
739 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
740 AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted);
741
742 /* Let the task point to its lock. */
743 pTask->pRangeLock = pRangeLock;
744 pEndpoint->AioMgr.cLockedReqsActive++;
745
746 return VINF_SUCCESS;
747}
748
749static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
750 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
751 PPDMACFILERANGELOCK pRangeLock)
752{
753 PPDMACTASKFILE pTasksWaitingHead;
754
755 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
756 pAioMgr, pEndpoint, pRangeLock));
757
758 /* pRangeLock can be NULL if there was no lock assigned with the task. */
759 if (!pRangeLock)
760 return NULL;
761
762 Assert(pRangeLock->cRefs == 1);
763
764 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
765 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
766 pRangeLock->pWaitingTasksHead = NULL;
767 pRangeLock->pWaitingTasksTail = NULL;
768 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
769 pEndpoint->AioMgr.cLockedReqsActive--;
770
771 return pTasksWaitingHead;
772}
773
774static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
775 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
776 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
777{
778 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
779 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
780 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
781 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
782
783 pTask->fPrefetch = false;
784 pTask->cbBounceBuffer = 0;
785
786 /*
787 * Before we start to setup the request we have to check whether there is a task
788 * already active which range intersects with ours. We have to defer execution
789 * of this task in two cases:
790 * - The pending task is a write and the current is either read or write
791 * - The pending task is a read and the current task is a write task.
792 *
793 * To check whether a range is currently "locked" we use the AVL tree where every pending task
794 * is stored by its file offset range. The current task will be added to the active task
795 * and will be executed when the active one completes. (The method below
796 * which checks whether a range is already used will add the task)
797 *
798 * This is necessary because of the requirement to align all requests to a 512 boundary
799 * which is enforced by the host OS (Linux and Windows atm). It is possible that
800 * we have to process unaligned tasks and need to align them using bounce buffers.
801 * While the data is fetched from the file another request might arrive writing to
802 * the same range. This will result in data corruption if both are executed concurrently.
803 */
804 int rc = VINF_SUCCESS;
805 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
806 true /* fAlignedReq */);
807 if (!fLocked)
808 {
809 /* Get a request handle. */
810 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
811 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
812
813 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
814 {
815 /* Grow the file if needed. */
816 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
817 {
818 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
819 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
820 }
821
822 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
823 pTask->Off, pTask->DataSeg.pvSeg,
824 pTask->DataSeg.cbSeg, pTask);
825 }
826 else
827 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
828 pTask->Off, pTask->DataSeg.pvSeg,
829 pTask->DataSeg.cbSeg, pTask);
830 AssertRC(rc);
831
832 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
833 pTask->DataSeg.cbSeg,
834 pTask, true /* fAlignedReq */);
835
836 if (RT_SUCCESS(rc))
837 {
838 pTask->hReq = hReq;
839 *phReq = hReq;
840 }
841 }
842 else
843 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
844
845 return rc;
846}
847
848static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
849 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
850 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
851{
852 /*
853 * Check if the alignment requirements are met.
854 * Offset, transfer size and buffer address
855 * need to be on a 512 boundary.
856 */
857 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
858 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
859 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
860 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
861 && offStart == pTask->Off;
862
863 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
864 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
865 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
866 offStart, cbToTransfer, pEndpoint->cbFile));
867
868 pTask->fPrefetch = false;
869
870 /*
871 * Before we start to setup the request we have to check whether there is a task
872 * already active which range intersects with ours. We have to defer execution
873 * of this task in two cases:
874 * - The pending task is a write and the current is either read or write
875 * - The pending task is a read and the current task is a write task.
876 *
877 * To check whether a range is currently "locked" we use the AVL tree where every pending task
878 * is stored by its file offset range. The current task will be added to the active task
879 * and will be executed when the active one completes. (The method below
880 * which checks whether a range is already used will add the task)
881 *
882 * This is necessary because of the requirement to align all requests to a 512 boundary
883 * which is enforced by the host OS (Linux and Windows atm). It is possible that
884 * we have to process unaligned tasks and need to align them using bounce buffers.
885 * While the data is fetched from the file another request might arrive writing to
886 * the same range. This will result in data corruption if both are executed concurrently.
887 */
888 int rc = VINF_SUCCESS;
889 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
890 if (!fLocked)
891 {
892 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
893 void *pvBuf = pTask->DataSeg.pvSeg;
894
895 /* Get a request handle. */
896 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
897 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
898
899 if ( !fAlignedReq
900 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
901 {
902 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
903 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
904
905 /* Create bounce buffer. */
906 pTask->cbBounceBuffer = cbToTransfer;
907
908 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
909 pTask->Off, offStart));
910 pTask->offBounceBuffer = pTask->Off - offStart;
911
912 /** @todo I think we need something like a RTMemAllocAligned method here.
913 * Current assumption is that the maximum alignment is 4096byte
914 * (GPT disk on Windows)
915 * so we can use RTMemPageAlloc here.
916 */
917 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
918 if (RT_LIKELY(pTask->pvBounceBuffer))
919 {
920 pvBuf = pTask->pvBounceBuffer;
921
922 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
923 {
924 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
925 || RT_UNLIKELY(offStart != pTask->Off))
926 {
927 /* We have to fill the buffer first before we can update the data. */
928 LogFlow(("Prefetching data for task %#p\n", pTask));
929 pTask->fPrefetch = true;
930 enmTransferType = PDMACTASKFILETRANSFER_READ;
931 }
932 else
933 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
934 }
935 }
936 else
937 rc = VERR_NO_MEMORY;
938 }
939 else
940 pTask->cbBounceBuffer = 0;
941
942 if (RT_SUCCESS(rc))
943 {
944 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
945 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
946
947 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
948 {
949 /* Grow the file if needed. */
950 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
951 {
952 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
953 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
954 }
955
956 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
957 offStart, pvBuf, cbToTransfer, pTask);
958 }
959 else
960 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
961 offStart, pvBuf, cbToTransfer, pTask);
962 AssertRC(rc);
963
964 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
965 if (RT_SUCCESS(rc))
966 {
967 pTask->hReq = hReq;
968 *phReq = hReq;
969 }
970 else
971 {
972 /* Cleanup */
973 if (pTask->cbBounceBuffer)
974 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
975 }
976 }
977 }
978 else
979 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
980
981 return rc;
982}
983
984static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
985 PPDMACEPFILEMGR pAioMgr,
986 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
987{
988 RTFILEAIOREQ apReqs[20];
989 unsigned cRequests = 0;
990 int rc = VINF_SUCCESS;
991
992 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
993 ("Trying to process request lists of a non active endpoint!\n"));
994
995 /* Go through the list and queue the requests until we get a flush request */
996 while ( pTaskHead
997 && !pEndpoint->pFlushReq
998 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
999 && RT_SUCCESS(rc))
1000 {
1001 RTMSINTERVAL msWhenNext;
1002 PPDMACTASKFILE pCurr = pTaskHead;
1003
1004 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
1005 {
1006 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
1007 break;
1008 }
1009
1010 pTaskHead = pTaskHead->pNext;
1011
1012 pCurr->pNext = NULL;
1013
1014 AssertMsg(RT_VALID_PTR(pCurr->pEndpoint) && pCurr->pEndpoint == pEndpoint,
1015 ("Endpoints do not match\n"));
1016
1017 switch (pCurr->enmTransferType)
1018 {
1019 case PDMACTASKFILETRANSFER_FLUSH:
1020 {
1021 /* If there is no data transfer request this flush request finished immediately. */
1022 if (pEndpoint->fAsyncFlushSupported)
1023 {
1024 /* Issue a flush to the host. */
1025 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1026 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1027
1028 LogFlow(("Flush request %#p\n", hReq));
1029
1030 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1031 if (RT_FAILURE(rc))
1032 {
1033 if (rc == VERR_NOT_SUPPORTED)
1034 LogRel(("AIOMgr: Async flushes not supported\n"));
1035 else
1036 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1037 pEndpoint->fAsyncFlushSupported = false;
1038 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1039 rc = VINF_SUCCESS; /* Fake success */
1040 }
1041 else
1042 {
1043 pCurr->hReq = hReq;
1044 apReqs[cRequests] = hReq;
1045 pEndpoint->AioMgr.cReqsProcessed++;
1046 cRequests++;
1047 }
1048 }
1049
1050 if ( !pEndpoint->AioMgr.cRequestsActive
1051 && !pEndpoint->fAsyncFlushSupported)
1052 {
1053 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1054 pdmacFileTaskFree(pEndpoint, pCurr);
1055 }
1056 else
1057 {
1058 Assert(!pEndpoint->pFlushReq);
1059 pEndpoint->pFlushReq = pCurr;
1060 }
1061 break;
1062 }
1063 case PDMACTASKFILETRANSFER_READ:
1064 case PDMACTASKFILETRANSFER_WRITE:
1065 {
1066 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1067
1068 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1069 {
1070 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1071 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1072 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1073 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1074 else
1075 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1076
1077 AssertRC(rc);
1078 }
1079 else
1080 {
1081 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1082 hReq = pCurr->hReq;
1083 }
1084
1085 LogFlow(("Read/Write request %#p\n", hReq));
1086
1087 if (hReq != NIL_RTFILEAIOREQ)
1088 {
1089 apReqs[cRequests] = hReq;
1090 cRequests++;
1091 }
1092 break;
1093 }
1094 default:
1095 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1096 } /* switch transfer type */
1097
1098 /* Queue the requests if the array is full. */
1099 if (cRequests == RT_ELEMENTS(apReqs))
1100 {
1101 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1102 cRequests = 0;
1103 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1104 ("Unexpected return code\n"));
1105 }
1106 }
1107
1108 if (cRequests)
1109 {
1110 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1111 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1112 ("Unexpected return code rc=%Rrc\n", rc));
1113 }
1114
1115 if (pTaskHead)
1116 {
1117 /* Add the rest of the tasks to the pending list */
1118 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1119
1120 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1121 && !pEndpoint->pFlushReq))
1122 {
1123#if 0
1124 /*
1125 * The I/O manager has no room left for more requests
1126 * but there are still requests to process.
1127 * Create a new I/O manager and let it handle some endpoints.
1128 */
1129 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1130#else
1131 /* Grow the I/O manager */
1132 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1133#endif
1134 }
1135 }
1136
1137 /* Insufficient resources are not fatal. */
1138 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1139 rc = VINF_SUCCESS;
1140
1141 return rc;
1142}
1143
1144/**
1145 * Adds all pending requests for the given endpoint
1146 * until a flush request is encountered or there is no
1147 * request anymore.
1148 *
1149 * @returns VBox status code.
1150 * @param pAioMgr The async I/O manager for the endpoint
1151 * @param pEndpoint The endpoint to get the requests from.
1152 */
1153static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1154 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1155{
1156 int rc = VINF_SUCCESS;
1157 PPDMACTASKFILE pTasksHead = NULL;
1158
1159 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1160 ("Trying to process request lists of a non active endpoint!\n"));
1161
1162 Assert(!pEndpoint->pFlushReq);
1163
1164 /* Check the pending list first */
1165 if (pEndpoint->AioMgr.pReqsPendingHead)
1166 {
1167 LogFlow(("Queuing pending requests first\n"));
1168
1169 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1170 /*
1171 * Clear the list as the processing routine will insert them into the list
1172 * again if it gets a flush request.
1173 */
1174 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1175 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1176 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1177 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1178 }
1179
1180 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1181 {
1182 /* Now the request queue. */
1183 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1184 if (pTasksHead)
1185 {
1186 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1187 AssertRC(rc);
1188 }
1189 }
1190
1191 return rc;
1192}
1193
1194static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1195{
1196 int rc = VINF_SUCCESS;
1197 bool fNotifyWaiter = false;
1198
1199 LogFlowFunc((": Enter\n"));
1200
1201 Assert(pAioMgr->fBlockingEventPending);
1202
1203 switch (pAioMgr->enmBlockingEvent)
1204 {
1205 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1206 {
1207 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1208 AssertMsg(RT_VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1209
1210 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1211
1212 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1213 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1214 if (pAioMgr->pEndpointsHead)
1215 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1216 pAioMgr->pEndpointsHead = pEndpointNew;
1217
1218 /* Assign the completion point to this file. */
1219 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1220 fNotifyWaiter = true;
1221 pAioMgr->cEndpoints++;
1222 break;
1223 }
1224 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1225 {
1226 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1227 AssertMsg(RT_VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1228
1229 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1230 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1231 break;
1232 }
1233 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1234 {
1235 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1236 AssertMsg(RT_VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1237
1238 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1239 {
1240 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1241
1242 /* Make sure all tasks finished. Process the queues a last time first. */
1243 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1244 AssertRC(rc);
1245
1246 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1247 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1248 }
1249 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1250 && (!pEndpointClose->AioMgr.cRequestsActive))
1251 fNotifyWaiter = true;
1252 break;
1253 }
1254 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1255 {
1256 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1257 if (!pAioMgr->cRequestsActive)
1258 fNotifyWaiter = true;
1259 break;
1260 }
1261 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1262 {
1263 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1264 break;
1265 }
1266 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1267 {
1268 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1269 fNotifyWaiter = true;
1270 break;
1271 }
1272 default:
1273 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1274 }
1275
1276 if (fNotifyWaiter)
1277 {
1278 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1279 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1280
1281 /* Release the waiting thread. */
1282 LogFlow(("Signalling waiter\n"));
1283 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1284 AssertRC(rc);
1285 }
1286
1287 LogFlowFunc((": Leave\n"));
1288 return rc;
1289}
1290
1291/**
1292 * Checks all endpoints for pending events or new requests.
1293 *
1294 * @returns VBox status code.
1295 * @param pAioMgr The I/O manager handle.
1296 */
1297static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1298{
1299 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1300 int rc = VINF_SUCCESS;
1301 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1302
1303 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1304
1305 while (pEndpoint)
1306 {
1307 if (!pEndpoint->pFlushReq
1308 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1309 && !pEndpoint->AioMgr.fMoving)
1310 {
1311 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1312 if (RT_FAILURE(rc))
1313 return rc;
1314 }
1315 else if ( !pEndpoint->AioMgr.cRequestsActive
1316 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1317 {
1318 /* Reopen the file so that the new endpoint can re-associate with the file */
1319 RTFileClose(pEndpoint->hFile);
1320 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1321 AssertRC(rc);
1322
1323 if (pEndpoint->AioMgr.fMoving)
1324 {
1325 pEndpoint->AioMgr.fMoving = false;
1326 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1327 }
1328 else
1329 {
1330 Assert(pAioMgr->fBlockingEventPending);
1331 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1332
1333 /* Release the waiting thread. */
1334 LogFlow(("Signalling waiter\n"));
1335 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1336 AssertRC(rc);
1337 }
1338 }
1339
1340 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1341 }
1342
1343 return rc;
1344}
1345
1346/**
1347 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1348 */
1349static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1350{
1351 size_t cbTransfered = 0;
1352 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1353
1354 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1355}
1356
1357static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1358 int rcReq, size_t cbTransfered)
1359{
1360 int rc = VINF_SUCCESS;
1361 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1362 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1363 PPDMACTASKFILE pTasksWaiting;
1364
1365 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1366
1367 pEndpoint = pTask->pEndpoint;
1368
1369 pTask->hReq = NIL_RTFILEAIOREQ;
1370
1371 pAioMgr->cRequestsActive--;
1372 pEndpoint->AioMgr.cRequestsActive--;
1373 pEndpoint->AioMgr.cReqsProcessed++;
1374
1375 /*
1376 * It is possible that the request failed on Linux with kernels < 2.6.23
1377 * if the passed buffer was allocated with remap_pfn_range or if the file
1378 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1379 * The endpoint will be migrated to a failsafe manager in case a request fails.
1380 */
1381 if (RT_FAILURE(rcReq))
1382 {
1383 /* Free bounce buffers and the IPRT request. */
1384 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1385
1386 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1387 {
1388 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1389 pEndpoint->fAsyncFlushSupported = false;
1390 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1391 /* The other method will take over now. */
1392
1393 pEndpoint->pFlushReq = NULL;
1394 /* Call completion callback */
1395 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1396 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1397 pdmacFileTaskFree(pEndpoint, pTask);
1398 }
1399 else
1400 {
1401 /* Free the lock and process pending tasks if necessary */
1402 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1403 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1404 AssertRC(rc);
1405
1406 if (pTask->cbBounceBuffer)
1407 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1408
1409 /*
1410 * Fatal errors are reported to the guest and non-fatal errors
1411 * will cause a migration to the failsafe manager in the hope
1412 * that the error disappears.
1413 */
1414 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1415 {
1416 /* Queue the request on the pending list. */
1417 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1418 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1419
1420 /* Create a new failsafe manager if necessary. */
1421 if (!pEndpoint->AioMgr.fMoving)
1422 {
1423 PPDMACEPFILEMGR pAioMgrFailsafe;
1424
1425 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1426 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1427
1428 pEndpoint->AioMgr.fMoving = true;
1429
1430 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1431 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1432 AssertRC(rc);
1433
1434 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1435
1436 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1437 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1438 }
1439
1440 /* If this was the last request for the endpoint migrate it to the new manager. */
1441 if (!pEndpoint->AioMgr.cRequestsActive)
1442 {
1443 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1444 Assert(!fReqsPending); NOREF(fReqsPending);
1445
1446 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1447 AssertRC(rc);
1448 }
1449 }
1450 else
1451 {
1452 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1453 pdmacFileTaskFree(pEndpoint, pTask);
1454 }
1455 }
1456 }
1457 else
1458 {
1459 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1460 {
1461 /* Clear pending flush */
1462 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1463 pEndpoint->pFlushReq = NULL;
1464 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1465
1466 /* Call completion callback */
1467 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1468 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1469 pdmacFileTaskFree(pEndpoint, pTask);
1470 }
1471 else
1472 {
1473 /*
1474 * Restart an incomplete transfer.
1475 * This usually means that the request will return an error now
1476 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1477 * the transfer needs to be continued.
1478 */
1479 pTask->cbTransfered += cbTransfered;
1480
1481 if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg
1482 || ( pTask->cbBounceBuffer
1483 && pTask->cbTransfered < pTask->cbBounceBuffer)))
1484 {
1485 RTFOFF offStart;
1486 size_t cbToTransfer;
1487 uint8_t *pbBuf = NULL;
1488
1489 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1490 pTask, cbTransfered));
1491 Assert(cbTransfered % 512 == 0);
1492
1493 if (pTask->cbBounceBuffer)
1494 {
1495 AssertPtr(pTask->pvBounceBuffer);
1496 offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
1497 cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
1498 pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
1499 }
1500 else
1501 {
1502 Assert(!pTask->pvBounceBuffer);
1503 offStart = pTask->Off + pTask->cbTransfered;
1504 cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
1505 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
1506 }
1507
1508 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1509 {
1510 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1511 pbBuf, cbToTransfer, pTask);
1512 }
1513 else
1514 {
1515 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1516 ("Invalid transfer type\n"));
1517 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1518 pbBuf, cbToTransfer, pTask);
1519 }
1520 AssertRC(rc);
1521
1522 pTask->hReq = hReq;
1523 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1524 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1525 ("Unexpected return code rc=%Rrc\n", rc));
1526 }
1527 else if (pTask->fPrefetch)
1528 {
1529 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1530 Assert(pTask->cbBounceBuffer);
1531
1532 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1533 pTask->DataSeg.pvSeg,
1534 pTask->DataSeg.cbSeg);
1535
1536 /* Write it now. */
1537 pTask->fPrefetch = false;
1538 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1539 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1540
1541 pTask->cbTransfered = 0;
1542
1543 /* Grow the file if needed. */
1544 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1545 {
1546 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1547 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1548 }
1549
1550 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1551 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1552 AssertRC(rc);
1553 pTask->hReq = hReq;
1554 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1555 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1556 ("Unexpected return code rc=%Rrc\n", rc));
1557 }
1558 else
1559 {
1560 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1561 {
1562 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1563 memcpy(pTask->DataSeg.pvSeg,
1564 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1565 pTask->DataSeg.cbSeg);
1566
1567 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1568 }
1569
1570 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1571
1572 /* Free the lock and process pending tasks if necessary */
1573 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1574 if (pTasksWaiting)
1575 {
1576 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1577 AssertRC(rc);
1578 }
1579
1580 /* Call completion callback */
1581 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1582 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1583 pdmacFileTaskFree(pEndpoint, pTask);
1584
1585 /*
1586 * If there is no request left on the endpoint but a flush request is set
1587 * it completed now and we notify the owner.
1588 * Furthermore we look for new requests and continue.
1589 */
1590 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1591 {
1592 /* Call completion callback */
1593 pTask = pEndpoint->pFlushReq;
1594 pEndpoint->pFlushReq = NULL;
1595
1596 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1597
1598 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1599 pdmacFileTaskFree(pEndpoint, pTask);
1600 }
1601 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1602 {
1603 /* If the endpoint is about to be migrated do it now. */
1604 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1605 Assert(!fReqsPending); NOREF(fReqsPending);
1606
1607 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1608 AssertRC(rc);
1609 }
1610 }
1611 } /* Not a flush request */
1612 } /* request completed successfully */
1613}
1614
1615/** Helper macro for checking for error codes. */
1616#define CHECK_RC(pAioMgr, rc) \
1617 if (RT_FAILURE(rc)) \
1618 {\
1619 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1620 return rc2;\
1621 }
1622
1623/**
1624 * The normal I/O manager using the RTFileAio* API
1625 *
1626 * @returns VBox status code.
1627 * @param hThreadSelf Handle of the thread.
1628 * @param pvUser Opaque user data.
1629 */
1630DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1631{
1632 int rc = VINF_SUCCESS;
1633 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1634 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1635 NOREF(hThreadSelf);
1636
1637 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1638 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1639 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1640 {
1641 if (!pAioMgr->cRequestsActive)
1642 {
1643 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1644 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1645 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1646 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1647 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1648
1649 LogFlow(("Got woken up\n"));
1650 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1651 }
1652
1653 /* Check for an external blocking event first. */
1654 if (pAioMgr->fBlockingEventPending)
1655 {
1656 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1657 CHECK_RC(pAioMgr, rc);
1658 }
1659
1660 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1661 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1662 {
1663 /* We got woken up because an endpoint issued new requests. Queue them. */
1664 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1665 CHECK_RC(pAioMgr, rc);
1666
1667 while (pAioMgr->cRequestsActive)
1668 {
1669 RTFILEAIOREQ apReqs[20];
1670 uint32_t cReqsCompleted = 0;
1671 size_t cReqsWait;
1672
1673 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1674 cReqsWait = RT_ELEMENTS(apReqs);
1675 else
1676 cReqsWait = pAioMgr->cRequestsActive;
1677
1678 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1679
1680 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1681 1,
1682 RT_INDEFINITE_WAIT, apReqs,
1683 cReqsWait, &cReqsCompleted);
1684 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1685 CHECK_RC(pAioMgr, rc);
1686
1687 LogFlow(("%d tasks completed\n", cReqsCompleted));
1688
1689 for (uint32_t i = 0; i < cReqsCompleted; i++)
1690 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1691
1692 /* Check for an external blocking event before we go to sleep again. */
1693 if (pAioMgr->fBlockingEventPending)
1694 {
1695 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1696 CHECK_RC(pAioMgr, rc);
1697 }
1698
1699 /* Update load statistics. */
1700 uint64_t uMillisCurr = RTTimeMilliTS();
1701 if (uMillisCurr > uMillisEnd)
1702 {
1703 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1704
1705 /* Calculate timespan. */
1706 uMillisCurr -= uMillisEnd;
1707
1708 while (pEndpointCurr)
1709 {
1710 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1711 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1712 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1713 }
1714
1715 /* Set new update interval */
1716 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1717 }
1718
1719 /* Check endpoints for new requests. */
1720 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1721 {
1722 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1723 CHECK_RC(pAioMgr, rc);
1724 }
1725 } /* while requests are active. */
1726
1727 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1728 {
1729 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1730 AssertRC(rc);
1731 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1732
1733 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1734 CHECK_RC(pAioMgr, rc);
1735 }
1736 } /* if still running */
1737 } /* while running */
1738
1739 LogFlowFunc(("rc=%Rrc\n", rc));
1740 return rc;
1741}
1742
1743#undef CHECK_RC
1744
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette