VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 57394

Last change on this file since 57394 was 57358, checked in by vboxsync, 9 years ago

*: scm cleanup run.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 66.0 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 57358 2015-08-14 15:16:38Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <iprt/assert.h>
29#include <VBox/log.h>
30
31#include "PDMAsyncCompletionFileInternal.h"
32
33/** The update period for the I/O load statistics in ms. */
34#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
35/** Maximum number of requests a manager will handle. */
36#define PDMACEPFILEMGR_REQS_STEP 64
37
38
39/*********************************************************************************************************************************
40* Internal functions *
41*********************************************************************************************************************************/
42static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
43 PPDMACEPFILEMGR pAioMgr,
44 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
45
46static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
47 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
48 PPDMACFILERANGELOCK pRangeLock);
49
50static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
51 int rc, size_t cbTransfered);
52
53
54int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
55{
56 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
57
58 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
59 if (rc == VERR_OUT_OF_RANGE)
60 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
61
62 if (RT_SUCCESS(rc))
63 {
64 /* Initialize request handle array. */
65 pAioMgr->iFreeEntry = 0;
66 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
67 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
68
69 if (pAioMgr->pahReqsFree)
70 {
71 /* Create the range lock memcache. */
72 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
73 0, UINT32_MAX, NULL, NULL, NULL, 0);
74 if (RT_SUCCESS(rc))
75 return VINF_SUCCESS;
76
77 RTMemFree(pAioMgr->pahReqsFree);
78 }
79 else
80 {
81 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
82 rc = VERR_NO_MEMORY;
83 }
84 }
85
86 return rc;
87}
88
89void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
90{
91 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
92
93 while (pAioMgr->iFreeEntry > 0)
94 {
95 pAioMgr->iFreeEntry--;
96 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
97 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
98 }
99
100 RTMemFree(pAioMgr->pahReqsFree);
101 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
102}
103
104#if 0 /* currently unused */
105/**
106 * Sorts the endpoint list with insertion sort.
107 */
108static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
109{
110 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
111
112 pEpPrev = pAioMgr->pEndpointsHead;
113 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
114
115 while (pEpCurr)
116 {
117 /* Remember the next element to sort because the list might change. */
118 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
119
120 /* Unlink the current element from the list. */
121 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
122 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
123
124 if (pPrev)
125 pPrev->AioMgr.pEndpointNext = pNext;
126 else
127 pAioMgr->pEndpointsHead = pNext;
128
129 if (pNext)
130 pNext->AioMgr.pEndpointPrev = pPrev;
131
132 /* Go back until we reached the place to insert the current endpoint into. */
133 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
134 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
135
136 /* Link the endpoint into the list. */
137 if (pEpPrev)
138 pNext = pEpPrev->AioMgr.pEndpointNext;
139 else
140 pNext = pAioMgr->pEndpointsHead;
141
142 pEpCurr->AioMgr.pEndpointNext = pNext;
143 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
144
145 if (pNext)
146 pNext->AioMgr.pEndpointPrev = pEpCurr;
147
148 if (pEpPrev)
149 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
150 else
151 pAioMgr->pEndpointsHead = pEpCurr;
152
153 pEpCurr = pEpNextToSort;
154 }
155
156#ifdef DEBUG
157 /* Validate sorting algorithm */
158 unsigned cEndpoints = 0;
159 pEpCurr = pAioMgr->pEndpointsHead;
160
161 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
162 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
163
164 while (pEpCurr)
165 {
166 cEndpoints++;
167
168 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
169 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
170
171 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
172 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
173
174 pEpCurr = pNext;
175 }
176
177 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
178
179#endif
180}
181#endif /* currently unused */
182
183/**
184 * Removes an endpoint from the currently assigned manager.
185 *
186 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
187 * FALSE otherwise.
188 * @param pEndpointRemove The endpoint to remove.
189 */
190static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
191{
192 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
193 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
194 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
195
196 pAioMgr->cEndpoints--;
197
198 if (pPrev)
199 pPrev->AioMgr.pEndpointNext = pNext;
200 else
201 pAioMgr->pEndpointsHead = pNext;
202
203 if (pNext)
204 pNext->AioMgr.pEndpointPrev = pPrev;
205
206 /* Make sure that there is no request pending on this manager for the endpoint. */
207 if (!pEndpointRemove->AioMgr.cRequestsActive)
208 {
209 Assert(!pEndpointRemove->pFlushReq);
210
211 /* Reopen the file so that the new endpoint can re-associate with the file */
212 RTFileClose(pEndpointRemove->hFile);
213 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
214 AssertRC(rc);
215 return false;
216 }
217
218 return true;
219}
220
221#if 0 /* currently unused */
222
223static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
224{
225 /* Balancing doesn't make sense with only one endpoint. */
226 if (pAioMgr->cEndpoints == 1)
227 return false;
228
229 /* Doesn't make sens to move endpoints if only one produces the whole load */
230 unsigned cEndpointsWithLoad = 0;
231
232 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
233
234 while (pCurr)
235 {
236 if (pCurr->AioMgr.cReqsPerSec)
237 cEndpointsWithLoad++;
238
239 pCurr = pCurr->AioMgr.pEndpointNext;
240 }
241
242 return (cEndpointsWithLoad > 1);
243}
244
245/**
246 * Creates a new I/O manager and spreads the I/O load of the endpoints
247 * between the given I/O manager and the new one.
248 *
249 * @returns nothing.
250 * @param pAioMgr The I/O manager with high I/O load.
251 */
252static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
253{
254 /*
255 * Check if balancing would improve the situation.
256 */
257 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
258 {
259 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
260 PPDMACEPFILEMGR pAioMgrNew = NULL;
261
262 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
263 if (RT_SUCCESS(rc))
264 {
265 /* We will sort the list by request count per second. */
266 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
267
268 /* Now move some endpoints to the new manager. */
269 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
270 unsigned cReqsOther = 0;
271 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
272
273 while (pCurr)
274 {
275 if (cReqsHere <= cReqsOther)
276 {
277 /*
278 * The other manager has more requests to handle now.
279 * We will keep the current endpoint.
280 */
281 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
282 cReqsHere += pCurr->AioMgr.cReqsPerSec;
283 pCurr = pCurr->AioMgr.pEndpointNext;
284 }
285 else
286 {
287 /* Move to other endpoint. */
288 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
289 cReqsOther += pCurr->AioMgr.cReqsPerSec;
290
291 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
292
293 pCurr = pCurr->AioMgr.pEndpointNext;
294
295 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
296
297 if (fReqsPending)
298 {
299 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
300 pMove->AioMgr.fMoving = true;
301 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
302 }
303 else
304 {
305 pMove->AioMgr.fMoving = false;
306 pMove->AioMgr.pAioMgrDst = NULL;
307 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
308 }
309 }
310 }
311 }
312 else
313 {
314 /* Don't process further but leave a log entry about reduced performance. */
315 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
316 }
317 }
318 else
319 Log(("AIOMgr: Load balancing would not improve anything\n"));
320}
321
322#endif /* unused */
323
324/**
325 * Increase the maximum number of active requests for the given I/O manager.
326 *
327 * @returns VBox status code.
328 * @param pAioMgr The I/O manager to grow.
329 */
330static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
331{
332 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
333
334 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
335 && !pAioMgr->cRequestsActive,
336 ("Invalid state of the I/O manager\n"));
337
338#ifdef RT_OS_WINDOWS
339 /*
340 * Reopen the files of all assigned endpoints first so we can assign them to the new
341 * I/O context.
342 */
343 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
344
345 while (pCurr)
346 {
347 RTFileClose(pCurr->hFile);
348 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
349
350 pCurr = pCurr->AioMgr.pEndpointNext;
351 }
352#endif
353
354 /* Create the new bigger context. */
355 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
356
357 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
358 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
359 if (rc == VERR_OUT_OF_RANGE)
360 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
361
362 if (RT_SUCCESS(rc))
363 {
364 /* Close the old context. */
365 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
366 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
367
368 pAioMgr->hAioCtx = hAioCtxNew;
369
370 /* Create a new I/O task handle array */
371 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
372 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
373
374 if (pahReqNew)
375 {
376 /* Copy the cached request handles. */
377 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
378 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
379
380 RTMemFree(pAioMgr->pahReqsFree);
381 pAioMgr->pahReqsFree = pahReqNew;
382 pAioMgr->cReqEntries = cReqEntriesNew;
383 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
384 pAioMgr->cRequestsActiveMax));
385 }
386 else
387 rc = VERR_NO_MEMORY;
388 }
389
390#ifdef RT_OS_WINDOWS
391 /* Assign the file to the new context. */
392 pCurr = pAioMgr->pEndpointsHead;
393 while (pCurr)
394 {
395 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
396 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
397
398 pCurr = pCurr->AioMgr.pEndpointNext;
399 }
400#endif
401
402 if (RT_FAILURE(rc))
403 {
404 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
405 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
406 }
407
408 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
409 LogFlowFunc(("returns rc=%Rrc\n", rc));
410
411 return rc;
412}
413
414/**
415 * Checks if a given status code is fatal.
416 * Non fatal errors can be fixed by migrating the endpoint to a
417 * failsafe manager.
418 *
419 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
420 * false If the error can be fixed by a migration. (image on NFS disk for example)
421 * @param rcReq The status code to check.
422 */
423DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
424{
425 return rcReq == VERR_DEV_IO_ERROR
426 || rcReq == VERR_FILE_IO_ERROR
427 || rcReq == VERR_DISK_IO_ERROR
428 || rcReq == VERR_DISK_FULL
429 || rcReq == VERR_FILE_TOO_BIG;
430}
431
432/**
433 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
434 *
435 * @returns VBox status code
436 * @param pAioMgr The I/O manager the error occurred on.
437 * @param rc The error code.
438 */
439static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
440{
441 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
442 pAioMgr, rc));
443 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
444 LogRel(("AIOMgr: Please contact the product vendor\n"));
445
446 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
447
448 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
449 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
450
451 AssertMsgFailed(("Implement\n"));
452 return VINF_SUCCESS;
453}
454
455/**
456 * Put a list of tasks in the pending request list of an endpoint.
457 */
458DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
459{
460 /* Add the rest of the tasks to the pending list */
461 if (!pEndpoint->AioMgr.pReqsPendingHead)
462 {
463 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
464 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
465 }
466 else
467 {
468 Assert(pEndpoint->AioMgr.pReqsPendingTail);
469 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
470 }
471
472 /* Update the tail. */
473 while (pTaskHead->pNext)
474 pTaskHead = pTaskHead->pNext;
475
476 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
477 pTaskHead->pNext = NULL;
478}
479
480/**
481 * Put one task in the pending request list of an endpoint.
482 */
483DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
484{
485 /* Add the rest of the tasks to the pending list */
486 if (!pEndpoint->AioMgr.pReqsPendingHead)
487 {
488 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
489 pEndpoint->AioMgr.pReqsPendingHead = pTask;
490 }
491 else
492 {
493 Assert(pEndpoint->AioMgr.pReqsPendingTail);
494 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
495 }
496
497 pEndpoint->AioMgr.pReqsPendingTail = pTask;
498 pTask->pNext = NULL;
499}
500
501/**
502 * Allocates a async I/O request.
503 *
504 * @returns Handle to the request.
505 * @param pAioMgr The I/O manager.
506 */
507static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
508{
509 /* Get a request handle. */
510 RTFILEAIOREQ hReq;
511 if (pAioMgr->iFreeEntry > 0)
512 {
513 pAioMgr->iFreeEntry--;
514 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
515 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
516 Assert(hReq != NIL_RTFILEAIOREQ);
517 }
518 else
519 {
520 int rc = RTFileAioReqCreate(&hReq);
521 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
522 }
523
524 return hReq;
525}
526
527/**
528 * Frees a async I/O request handle.
529 *
530 * @returns nothing.
531 * @param pAioMgr The I/O manager.
532 * @param hReq The I/O request handle to free.
533 */
534static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
535{
536 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
537 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
538
539 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
540 pAioMgr->iFreeEntry++;
541}
542
543/**
544 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
545 */
546static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
547 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
548 PRTFILEAIOREQ pahReqs, unsigned cReqs)
549{
550 pAioMgr->cRequestsActive += cReqs;
551 pEndpoint->AioMgr.cRequestsActive += cReqs;
552
553 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
554 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
555
556 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
557 if (RT_FAILURE(rc))
558 {
559 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
560 {
561 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
562
563 /* Append any not submitted task to the waiting list. */
564 for (size_t i = 0; i < cReqs; i++)
565 {
566 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
567
568 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
569 {
570 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
571
572 Assert(pTask->hReq == pahReqs[i]);
573 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
574 pAioMgr->cRequestsActive--;
575 pEndpoint->AioMgr.cRequestsActive--;
576
577 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
578 {
579 /* Clear the pending flush */
580 Assert(pEndpoint->pFlushReq == pTask);
581 pEndpoint->pFlushReq = NULL;
582 }
583 }
584 }
585
586 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
587
588 /* Print an entry in the release log */
589 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
590 {
591 pEpClass->fOutOfResourcesWarningPrinted = true;
592 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
593 pAioMgr->cRequestsActive));
594 }
595
596 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
597 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
598 rc = VINF_SUCCESS;
599 }
600 else /* Another kind of error happened (full disk, ...) */
601 {
602 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
603 for (size_t i = 0; i < cReqs; i++)
604 {
605 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
606
607 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
608 {
609 /* We call ourself again to do any error handling which might come up now. */
610 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
611 AssertRC(rc);
612 }
613 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
614 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
615 }
616
617
618 if ( pEndpoint->pFlushReq
619 && !pAioMgr->cRequestsActive
620 && !pEndpoint->fAsyncFlushSupported)
621 {
622 /*
623 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
624 * the async flush API.
625 * Happens only if this we just noticed that this is not supported
626 * and the only active request was a flush.
627 */
628 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
629 pEndpoint->pFlushReq = NULL;
630 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
631 pdmacFileTaskFree(pEndpoint, pFlush);
632 }
633 }
634 }
635
636 return VINF_SUCCESS;
637}
638
639static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
640 RTFOFF offStart, size_t cbRange,
641 PPDMACTASKFILE pTask, bool fAlignedReq)
642{
643 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
644 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
645 ("Invalid task type %d\n", pTask->enmTransferType));
646
647 /*
648 * If there is no unaligned request active and the current one is aligned
649 * just pass it through.
650 */
651 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
652 return false;
653
654 PPDMACFILERANGELOCK pRangeLock;
655 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
656 if (!pRangeLock)
657 {
658 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
659 /* Check if we intersect with the range. */
660 if ( !pRangeLock
661 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
662 && (pRangeLock->Core.KeyLast) >= offStart))
663 {
664 pRangeLock = NULL; /* False alarm */
665 }
666 }
667
668 /* Check whether we have one of the situations explained below */
669 if (pRangeLock)
670 {
671 /* Add to the list. */
672 pTask->pNext = NULL;
673
674 if (!pRangeLock->pWaitingTasksHead)
675 {
676 Assert(!pRangeLock->pWaitingTasksTail);
677 pRangeLock->pWaitingTasksHead = pTask;
678 pRangeLock->pWaitingTasksTail = pTask;
679 }
680 else
681 {
682 AssertPtr(pRangeLock->pWaitingTasksTail);
683 pRangeLock->pWaitingTasksTail->pNext = pTask;
684 pRangeLock->pWaitingTasksTail = pTask;
685 }
686 return true;
687 }
688
689 return false;
690}
691
692static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
693 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
694 RTFOFF offStart, size_t cbRange,
695 PPDMACTASKFILE pTask, bool fAlignedReq)
696{
697 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
698 pAioMgr, pEndpoint, offStart, cbRange, pTask));
699
700 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
701 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
702 offStart, cbRange));
703
704 /*
705 * If there is no unaligned request active and the current one is aligned
706 * just don't use the lock.
707 */
708 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
709 {
710 pTask->pRangeLock = NULL;
711 return VINF_SUCCESS;
712 }
713
714 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
715 if (!pRangeLock)
716 return VERR_NO_MEMORY;
717
718 /* Init the lock. */
719 pRangeLock->Core.Key = offStart;
720 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
721 pRangeLock->cRefs = 1;
722 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
723 pRangeLock->pWaitingTasksHead = NULL;
724 pRangeLock->pWaitingTasksTail = NULL;
725
726 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
727 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
728
729 /* Let the task point to its lock. */
730 pTask->pRangeLock = pRangeLock;
731 pEndpoint->AioMgr.cLockedReqsActive++;
732
733 return VINF_SUCCESS;
734}
735
736static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
737 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
738 PPDMACFILERANGELOCK pRangeLock)
739{
740 PPDMACTASKFILE pTasksWaitingHead;
741
742 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
743 pAioMgr, pEndpoint, pRangeLock));
744
745 /* pRangeLock can be NULL if there was no lock assigned with the task. */
746 if (!pRangeLock)
747 return NULL;
748
749 Assert(pRangeLock->cRefs == 1);
750
751 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
752 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
753 pRangeLock->pWaitingTasksHead = NULL;
754 pRangeLock->pWaitingTasksTail = NULL;
755 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
756 pEndpoint->AioMgr.cLockedReqsActive--;
757
758 return pTasksWaitingHead;
759}
760
761static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
762 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
763 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
764{
765 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
766 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
767 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
768 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
769
770 pTask->fPrefetch = false;
771 pTask->cbBounceBuffer = 0;
772
773 /*
774 * Before we start to setup the request we have to check whether there is a task
775 * already active which range intersects with ours. We have to defer execution
776 * of this task in two cases:
777 * - The pending task is a write and the current is either read or write
778 * - The pending task is a read and the current task is a write task.
779 *
780 * To check whether a range is currently "locked" we use the AVL tree where every pending task
781 * is stored by its file offset range. The current task will be added to the active task
782 * and will be executed when the active one completes. (The method below
783 * which checks whether a range is already used will add the task)
784 *
785 * This is necessary because of the requirement to align all requests to a 512 boundary
786 * which is enforced by the host OS (Linux and Windows atm). It is possible that
787 * we have to process unaligned tasks and need to align them using bounce buffers.
788 * While the data is fetched from the file another request might arrive writing to
789 * the same range. This will result in data corruption if both are executed concurrently.
790 */
791 int rc = VINF_SUCCESS;
792 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
793 true /* fAlignedReq */);
794 if (!fLocked)
795 {
796 /* Get a request handle. */
797 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
798 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
799
800 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
801 {
802 /* Grow the file if needed. */
803 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
804 {
805 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
806 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
807 }
808
809 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
810 pTask->Off, pTask->DataSeg.pvSeg,
811 pTask->DataSeg.cbSeg, pTask);
812 }
813 else
814 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
815 pTask->Off, pTask->DataSeg.pvSeg,
816 pTask->DataSeg.cbSeg, pTask);
817 AssertRC(rc);
818
819 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
820 pTask->DataSeg.cbSeg,
821 pTask, true /* fAlignedReq */);
822
823 if (RT_SUCCESS(rc))
824 {
825 pTask->hReq = hReq;
826 *phReq = hReq;
827 }
828 }
829 else
830 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
831
832 return rc;
833}
834
835static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
836 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
837 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
838{
839 /*
840 * Check if the alignment requirements are met.
841 * Offset, transfer size and buffer address
842 * need to be on a 512 boundary.
843 */
844 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
845 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
846 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
847 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
848 && offStart == pTask->Off;
849
850 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
851 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
852 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
853 offStart, cbToTransfer, pEndpoint->cbFile));
854
855 pTask->fPrefetch = false;
856
857 /*
858 * Before we start to setup the request we have to check whether there is a task
859 * already active which range intersects with ours. We have to defer execution
860 * of this task in two cases:
861 * - The pending task is a write and the current is either read or write
862 * - The pending task is a read and the current task is a write task.
863 *
864 * To check whether a range is currently "locked" we use the AVL tree where every pending task
865 * is stored by its file offset range. The current task will be added to the active task
866 * and will be executed when the active one completes. (The method below
867 * which checks whether a range is already used will add the task)
868 *
869 * This is necessary because of the requirement to align all requests to a 512 boundary
870 * which is enforced by the host OS (Linux and Windows atm). It is possible that
871 * we have to process unaligned tasks and need to align them using bounce buffers.
872 * While the data is fetched from the file another request might arrive writing to
873 * the same range. This will result in data corruption if both are executed concurrently.
874 */
875 int rc = VINF_SUCCESS;
876 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
877 if (!fLocked)
878 {
879 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
880 void *pvBuf = pTask->DataSeg.pvSeg;
881
882 /* Get a request handle. */
883 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
884 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
885
886 if ( !fAlignedReq
887 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
888 {
889 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
890 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
891
892 /* Create bounce buffer. */
893 pTask->cbBounceBuffer = cbToTransfer;
894
895 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
896 pTask->Off, offStart));
897 pTask->offBounceBuffer = pTask->Off - offStart;
898
899 /** @todo: I think we need something like a RTMemAllocAligned method here.
900 * Current assumption is that the maximum alignment is 4096byte
901 * (GPT disk on Windows)
902 * so we can use RTMemPageAlloc here.
903 */
904 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
905 if (RT_LIKELY(pTask->pvBounceBuffer))
906 {
907 pvBuf = pTask->pvBounceBuffer;
908
909 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
910 {
911 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
912 || RT_UNLIKELY(offStart != pTask->Off))
913 {
914 /* We have to fill the buffer first before we can update the data. */
915 LogFlow(("Prefetching data for task %#p\n", pTask));
916 pTask->fPrefetch = true;
917 enmTransferType = PDMACTASKFILETRANSFER_READ;
918 }
919 else
920 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
921 }
922 }
923 else
924 rc = VERR_NO_MEMORY;
925 }
926 else
927 pTask->cbBounceBuffer = 0;
928
929 if (RT_SUCCESS(rc))
930 {
931 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
932 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
933
934 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
935 {
936 /* Grow the file if needed. */
937 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
938 {
939 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
940 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
941 }
942
943 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
944 offStart, pvBuf, cbToTransfer, pTask);
945 }
946 else
947 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
948 offStart, pvBuf, cbToTransfer, pTask);
949 AssertRC(rc);
950
951 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
952 if (RT_SUCCESS(rc))
953 {
954 pTask->hReq = hReq;
955 *phReq = hReq;
956 }
957 else
958 {
959 /* Cleanup */
960 if (pTask->cbBounceBuffer)
961 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
962 }
963 }
964 }
965 else
966 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
967
968 return rc;
969}
970
971static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
972 PPDMACEPFILEMGR pAioMgr,
973 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
974{
975 RTFILEAIOREQ apReqs[20];
976 unsigned cRequests = 0;
977 int rc = VINF_SUCCESS;
978
979 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
980 ("Trying to process request lists of a non active endpoint!\n"));
981
982 /* Go through the list and queue the requests until we get a flush request */
983 while ( pTaskHead
984 && !pEndpoint->pFlushReq
985 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
986 && RT_SUCCESS(rc))
987 {
988 RTMSINTERVAL msWhenNext;
989 PPDMACTASKFILE pCurr = pTaskHead;
990
991 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
992 {
993 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
994 break;
995 }
996
997 pTaskHead = pTaskHead->pNext;
998
999 pCurr->pNext = NULL;
1000
1001 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
1002 ("Endpoints do not match\n"));
1003
1004 switch (pCurr->enmTransferType)
1005 {
1006 case PDMACTASKFILETRANSFER_FLUSH:
1007 {
1008 /* If there is no data transfer request this flush request finished immediately. */
1009 if (pEndpoint->fAsyncFlushSupported)
1010 {
1011 /* Issue a flush to the host. */
1012 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1013 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1014
1015 LogFlow(("Flush request %#p\n", hReq));
1016
1017 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1018 if (RT_FAILURE(rc))
1019 {
1020 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1021 pEndpoint->fAsyncFlushSupported = false;
1022 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1023 rc = VINF_SUCCESS; /* Fake success */
1024 }
1025 else
1026 {
1027 pCurr->hReq = hReq;
1028 apReqs[cRequests] = hReq;
1029 pEndpoint->AioMgr.cReqsProcessed++;
1030 cRequests++;
1031 }
1032 }
1033
1034 if ( !pEndpoint->AioMgr.cRequestsActive
1035 && !pEndpoint->fAsyncFlushSupported)
1036 {
1037 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1038 pdmacFileTaskFree(pEndpoint, pCurr);
1039 }
1040 else
1041 {
1042 Assert(!pEndpoint->pFlushReq);
1043 pEndpoint->pFlushReq = pCurr;
1044 }
1045 break;
1046 }
1047 case PDMACTASKFILETRANSFER_READ:
1048 case PDMACTASKFILETRANSFER_WRITE:
1049 {
1050 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1051
1052 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1053 {
1054 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1055 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1056 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1057 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1058 else
1059 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1060
1061 AssertRC(rc);
1062 }
1063 else
1064 {
1065 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1066 hReq = pCurr->hReq;
1067 }
1068
1069 LogFlow(("Read/Write request %#p\n", hReq));
1070
1071 if (hReq != NIL_RTFILEAIOREQ)
1072 {
1073 apReqs[cRequests] = hReq;
1074 cRequests++;
1075 }
1076 break;
1077 }
1078 default:
1079 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1080 } /* switch transfer type */
1081
1082 /* Queue the requests if the array is full. */
1083 if (cRequests == RT_ELEMENTS(apReqs))
1084 {
1085 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1086 cRequests = 0;
1087 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1088 ("Unexpected return code\n"));
1089 }
1090 }
1091
1092 if (cRequests)
1093 {
1094 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1095 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1096 ("Unexpected return code rc=%Rrc\n", rc));
1097 }
1098
1099 if (pTaskHead)
1100 {
1101 /* Add the rest of the tasks to the pending list */
1102 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1103
1104 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1105 && !pEndpoint->pFlushReq))
1106 {
1107#if 0
1108 /*
1109 * The I/O manager has no room left for more requests
1110 * but there are still requests to process.
1111 * Create a new I/O manager and let it handle some endpoints.
1112 */
1113 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1114#else
1115 /* Grow the I/O manager */
1116 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1117#endif
1118 }
1119 }
1120
1121 /* Insufficient resources are not fatal. */
1122 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1123 rc = VINF_SUCCESS;
1124
1125 return rc;
1126}
1127
1128/**
1129 * Adds all pending requests for the given endpoint
1130 * until a flush request is encountered or there is no
1131 * request anymore.
1132 *
1133 * @returns VBox status code.
1134 * @param pAioMgr The async I/O manager for the endpoint
1135 * @param pEndpoint The endpoint to get the requests from.
1136 */
1137static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1138 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1139{
1140 int rc = VINF_SUCCESS;
1141 PPDMACTASKFILE pTasksHead = NULL;
1142
1143 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1144 ("Trying to process request lists of a non active endpoint!\n"));
1145
1146 Assert(!pEndpoint->pFlushReq);
1147
1148 /* Check the pending list first */
1149 if (pEndpoint->AioMgr.pReqsPendingHead)
1150 {
1151 LogFlow(("Queuing pending requests first\n"));
1152
1153 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1154 /*
1155 * Clear the list as the processing routine will insert them into the list
1156 * again if it gets a flush request.
1157 */
1158 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1159 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1160 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1161 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1162 }
1163
1164 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1165 {
1166 /* Now the request queue. */
1167 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1168 if (pTasksHead)
1169 {
1170 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1171 AssertRC(rc);
1172 }
1173 }
1174
1175 return rc;
1176}
1177
1178static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1179{
1180 int rc = VINF_SUCCESS;
1181 bool fNotifyWaiter = false;
1182
1183 LogFlowFunc((": Enter\n"));
1184
1185 Assert(pAioMgr->fBlockingEventPending);
1186
1187 switch (pAioMgr->enmBlockingEvent)
1188 {
1189 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1190 {
1191 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1192 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1193
1194 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1195
1196 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1197 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1198 if (pAioMgr->pEndpointsHead)
1199 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1200 pAioMgr->pEndpointsHead = pEndpointNew;
1201
1202 /* Assign the completion point to this file. */
1203 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1204 fNotifyWaiter = true;
1205 pAioMgr->cEndpoints++;
1206 break;
1207 }
1208 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1209 {
1210 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1211 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1212
1213 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1214 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1215 break;
1216 }
1217 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1218 {
1219 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1220 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1221
1222 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1223 {
1224 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1225
1226 /* Make sure all tasks finished. Process the queues a last time first. */
1227 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1228 AssertRC(rc);
1229
1230 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1231 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1232 }
1233 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1234 && (!pEndpointClose->AioMgr.cRequestsActive))
1235 fNotifyWaiter = true;
1236 break;
1237 }
1238 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1239 {
1240 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1241 if (!pAioMgr->cRequestsActive)
1242 fNotifyWaiter = true;
1243 break;
1244 }
1245 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1246 {
1247 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1248 break;
1249 }
1250 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1251 {
1252 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1253 fNotifyWaiter = true;
1254 break;
1255 }
1256 default:
1257 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1258 }
1259
1260 if (fNotifyWaiter)
1261 {
1262 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1263 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1264
1265 /* Release the waiting thread. */
1266 LogFlow(("Signalling waiter\n"));
1267 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1268 AssertRC(rc);
1269 }
1270
1271 LogFlowFunc((": Leave\n"));
1272 return rc;
1273}
1274
1275/**
1276 * Checks all endpoints for pending events or new requests.
1277 *
1278 * @returns VBox status code.
1279 * @param pAioMgr The I/O manager handle.
1280 */
1281static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1282{
1283 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1284 int rc = VINF_SUCCESS;
1285 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1286
1287 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1288
1289 while (pEndpoint)
1290 {
1291 if (!pEndpoint->pFlushReq
1292 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1293 && !pEndpoint->AioMgr.fMoving)
1294 {
1295 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1296 if (RT_FAILURE(rc))
1297 return rc;
1298 }
1299 else if ( !pEndpoint->AioMgr.cRequestsActive
1300 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1301 {
1302 /* Reopen the file so that the new endpoint can re-associate with the file */
1303 RTFileClose(pEndpoint->hFile);
1304 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1305 AssertRC(rc);
1306
1307 if (pEndpoint->AioMgr.fMoving)
1308 {
1309 pEndpoint->AioMgr.fMoving = false;
1310 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1311 }
1312 else
1313 {
1314 Assert(pAioMgr->fBlockingEventPending);
1315 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1316
1317 /* Release the waiting thread. */
1318 LogFlow(("Signalling waiter\n"));
1319 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1320 AssertRC(rc);
1321 }
1322 }
1323
1324 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1325 }
1326
1327 return rc;
1328}
1329
1330/**
1331 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1332 */
1333static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1334{
1335 size_t cbTransfered = 0;
1336 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1337
1338 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1339}
1340
1341static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1342 int rcReq, size_t cbTransfered)
1343{
1344 int rc = VINF_SUCCESS;
1345 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1346 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1347 PPDMACTASKFILE pTasksWaiting;
1348
1349 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1350
1351 pEndpoint = pTask->pEndpoint;
1352
1353 pTask->hReq = NIL_RTFILEAIOREQ;
1354
1355 pAioMgr->cRequestsActive--;
1356 pEndpoint->AioMgr.cRequestsActive--;
1357 pEndpoint->AioMgr.cReqsProcessed++;
1358
1359 /*
1360 * It is possible that the request failed on Linux with kernels < 2.6.23
1361 * if the passed buffer was allocated with remap_pfn_range or if the file
1362 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1363 * The endpoint will be migrated to a failsafe manager in case a request fails.
1364 */
1365 if (RT_FAILURE(rcReq))
1366 {
1367 /* Free bounce buffers and the IPRT request. */
1368 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1369
1370 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1371 {
1372 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1373 pEndpoint->fAsyncFlushSupported = false;
1374 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1375 /* The other method will take over now. */
1376
1377 pEndpoint->pFlushReq = NULL;
1378 /* Call completion callback */
1379 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1380 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1381 pdmacFileTaskFree(pEndpoint, pTask);
1382 }
1383 else
1384 {
1385 /* Free the lock and process pending tasks if necessary */
1386 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1387 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1388 AssertRC(rc);
1389
1390 if (pTask->cbBounceBuffer)
1391 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1392
1393 /*
1394 * Fatal errors are reported to the guest and non-fatal errors
1395 * will cause a migration to the failsafe manager in the hope
1396 * that the error disappears.
1397 */
1398 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1399 {
1400 /* Queue the request on the pending list. */
1401 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1402 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1403
1404 /* Create a new failsafe manager if necessary. */
1405 if (!pEndpoint->AioMgr.fMoving)
1406 {
1407 PPDMACEPFILEMGR pAioMgrFailsafe;
1408
1409 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1410 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1411
1412 pEndpoint->AioMgr.fMoving = true;
1413
1414 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1415 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1416 AssertRC(rc);
1417
1418 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1419
1420 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1421 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1422 }
1423
1424 /* If this was the last request for the endpoint migrate it to the new manager. */
1425 if (!pEndpoint->AioMgr.cRequestsActive)
1426 {
1427 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1428 Assert(!fReqsPending);
1429
1430 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1431 AssertRC(rc);
1432 }
1433 }
1434 else
1435 {
1436 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1437 pdmacFileTaskFree(pEndpoint, pTask);
1438 }
1439 }
1440 }
1441 else
1442 {
1443 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1444 {
1445 /* Clear pending flush */
1446 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1447 pEndpoint->pFlushReq = NULL;
1448 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1449
1450 /* Call completion callback */
1451 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1452 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1453 pdmacFileTaskFree(pEndpoint, pTask);
1454 }
1455 else
1456 {
1457 /*
1458 * Restart an incomplete transfer.
1459 * This usually means that the request will return an error now
1460 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1461 * the transfer needs to be continued.
1462 */
1463 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1464 || ( pTask->cbBounceBuffer
1465 && cbTransfered < pTask->cbBounceBuffer)))
1466 {
1467 RTFOFF offStart;
1468 size_t cbToTransfer;
1469 uint8_t *pbBuf = NULL;
1470
1471 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1472 pTask, cbTransfered));
1473 Assert(cbTransfered % 512 == 0);
1474
1475 if (pTask->cbBounceBuffer)
1476 {
1477 AssertPtr(pTask->pvBounceBuffer);
1478 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1479 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1480 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1481 }
1482 else
1483 {
1484 Assert(!pTask->pvBounceBuffer);
1485 offStart = pTask->Off + cbTransfered;
1486 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1487 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1488 }
1489
1490 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1491 {
1492 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1493 pbBuf, cbToTransfer, pTask);
1494 }
1495 else
1496 {
1497 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1498 ("Invalid transfer type\n"));
1499 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1500 pbBuf, cbToTransfer, pTask);
1501 }
1502 AssertRC(rc);
1503
1504 pTask->hReq = hReq;
1505 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1506 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1507 ("Unexpected return code rc=%Rrc\n", rc));
1508 }
1509 else if (pTask->fPrefetch)
1510 {
1511 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1512 Assert(pTask->cbBounceBuffer);
1513
1514 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1515 pTask->DataSeg.pvSeg,
1516 pTask->DataSeg.cbSeg);
1517
1518 /* Write it now. */
1519 pTask->fPrefetch = false;
1520 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1521 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1522
1523 /* Grow the file if needed. */
1524 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1525 {
1526 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1527 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1528 }
1529
1530 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1531 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1532 AssertRC(rc);
1533 pTask->hReq = hReq;
1534 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1535 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1536 ("Unexpected return code rc=%Rrc\n", rc));
1537 }
1538 else
1539 {
1540 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1541 {
1542 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1543 memcpy(pTask->DataSeg.pvSeg,
1544 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1545 pTask->DataSeg.cbSeg);
1546
1547 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1548 }
1549
1550 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1551
1552 /* Free the lock and process pending tasks if necessary */
1553 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1554 if (pTasksWaiting)
1555 {
1556 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1557 AssertRC(rc);
1558 }
1559
1560 /* Call completion callback */
1561 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1562 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1563 pdmacFileTaskFree(pEndpoint, pTask);
1564
1565 /*
1566 * If there is no request left on the endpoint but a flush request is set
1567 * it completed now and we notify the owner.
1568 * Furthermore we look for new requests and continue.
1569 */
1570 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1571 {
1572 /* Call completion callback */
1573 pTask = pEndpoint->pFlushReq;
1574 pEndpoint->pFlushReq = NULL;
1575
1576 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1577
1578 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1579 pdmacFileTaskFree(pEndpoint, pTask);
1580 }
1581 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1582 {
1583 /* If the endpoint is about to be migrated do it now. */
1584 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1585 Assert(!fReqsPending);
1586
1587 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1588 AssertRC(rc);
1589 }
1590 }
1591 } /* Not a flush request */
1592 } /* request completed successfully */
1593}
1594
1595/** Helper macro for checking for error codes. */
1596#define CHECK_RC(pAioMgr, rc) \
1597 if (RT_FAILURE(rc)) \
1598 {\
1599 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1600 return rc2;\
1601 }
1602
1603/**
1604 * The normal I/O manager using the RTFileAio* API
1605 *
1606 * @returns VBox status code.
1607 * @param hThreadSelf Handle of the thread.
1608 * @param pvUser Opaque user data.
1609 */
1610DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1611{
1612 int rc = VINF_SUCCESS;
1613 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1614 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1615 NOREF(hThreadSelf);
1616
1617 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1618 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1619 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1620 {
1621 if (!pAioMgr->cRequestsActive)
1622 {
1623 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1624 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1625 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1626 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1627 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1628
1629 LogFlow(("Got woken up\n"));
1630 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1631 }
1632
1633 /* Check for an external blocking event first. */
1634 if (pAioMgr->fBlockingEventPending)
1635 {
1636 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1637 CHECK_RC(pAioMgr, rc);
1638 }
1639
1640 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1641 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1642 {
1643 /* We got woken up because an endpoint issued new requests. Queue them. */
1644 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1645 CHECK_RC(pAioMgr, rc);
1646
1647 while (pAioMgr->cRequestsActive)
1648 {
1649 RTFILEAIOREQ apReqs[20];
1650 uint32_t cReqsCompleted = 0;
1651 size_t cReqsWait;
1652
1653 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1654 cReqsWait = RT_ELEMENTS(apReqs);
1655 else
1656 cReqsWait = pAioMgr->cRequestsActive;
1657
1658 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1659
1660 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1661 1,
1662 RT_INDEFINITE_WAIT, apReqs,
1663 cReqsWait, &cReqsCompleted);
1664 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1665 CHECK_RC(pAioMgr, rc);
1666
1667 LogFlow(("%d tasks completed\n", cReqsCompleted));
1668
1669 for (uint32_t i = 0; i < cReqsCompleted; i++)
1670 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1671
1672 /* Check for an external blocking event before we go to sleep again. */
1673 if (pAioMgr->fBlockingEventPending)
1674 {
1675 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1676 CHECK_RC(pAioMgr, rc);
1677 }
1678
1679 /* Update load statistics. */
1680 uint64_t uMillisCurr = RTTimeMilliTS();
1681 if (uMillisCurr > uMillisEnd)
1682 {
1683 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1684
1685 /* Calculate timespan. */
1686 uMillisCurr -= uMillisEnd;
1687
1688 while (pEndpointCurr)
1689 {
1690 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1691 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1692 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1693 }
1694
1695 /* Set new update interval */
1696 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1697 }
1698
1699 /* Check endpoints for new requests. */
1700 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1701 {
1702 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1703 CHECK_RC(pAioMgr, rc);
1704 }
1705 } /* while requests are active. */
1706
1707 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1708 {
1709 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1710 AssertRC(rc);
1711 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1712
1713 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1714 CHECK_RC(pAioMgr, rc);
1715 }
1716 } /* if still running */
1717 } /* while running */
1718
1719 LogFlowFunc(("rc=%Rrc\n", rc));
1720 return rc;
1721}
1722
1723#undef CHECK_RC
1724
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette