VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 35205

Last change on this file since 35205 was 35205, checked in by vboxsync, 14 years ago

AsyncCompletion: Don't waste CPU cycles when all endpoints have reached the bandwidth limit and go to sleep

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 64.3 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 35205 2010-12-16 18:35:02Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Oracle Corporation
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 */
18#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
19#include <iprt/types.h>
20#include <iprt/asm.h>
21#include <iprt/file.h>
22#include <iprt/mem.h>
23#include <iprt/string.h>
24#include <iprt/assert.h>
25#include <VBox/log.h>
26
27#include "PDMAsyncCompletionFileInternal.h"
28
29/** The update period for the I/O load statistics in ms. */
30#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
31/** Maximum number of requests a manager will handle. */
32#define PDMACEPFILEMGR_REQS_STEP 512
33
34/*******************************************************************************
35* Internal functions *
36*******************************************************************************/
37static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
38 PPDMACEPFILEMGR pAioMgr,
39 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
40
41static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
42 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
43 PPDMACFILERANGELOCK pRangeLock);
44
45static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
46 int rc, size_t cbTransfered);
47
48int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
49{
50 int rc = VINF_SUCCESS;
51
52 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
53
54 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
55 if (rc == VERR_OUT_OF_RANGE)
56 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
57
58 if (RT_SUCCESS(rc))
59 {
60 /* Initialize request handle array. */
61 pAioMgr->iFreeEntry = 0;
62 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
63 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
64 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
65
66 if (pAioMgr->pahReqsFree)
67 {
68 /* Create the range lock memcache. */
69 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
70 0, UINT32_MAX, NULL, NULL, NULL, 0);
71 if (RT_SUCCESS(rc))
72 return VINF_SUCCESS;
73
74 RTMemFree(pAioMgr->pahReqsFree);
75 }
76 else
77 {
78 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
79 rc = VERR_NO_MEMORY;
80 }
81 }
82
83 return rc;
84}
85
86void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
87{
88 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
89
90 while (pAioMgr->iFreeEntry > 0)
91 {
92 pAioMgr->iFreeEntry--;
93 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
94 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
95 }
96
97 RTMemFree(pAioMgr->pahReqsFree);
98 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
99}
100
101/**
102 * Sorts the endpoint list with insertion sort.
103 */
104static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
105{
106 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
107
108 pEpPrev = pAioMgr->pEndpointsHead;
109 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
110
111 while (pEpCurr)
112 {
113 /* Remember the next element to sort because the list might change. */
114 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
115
116 /* Unlink the current element from the list. */
117 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
118 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
119
120 if (pPrev)
121 pPrev->AioMgr.pEndpointNext = pNext;
122 else
123 pAioMgr->pEndpointsHead = pNext;
124
125 if (pNext)
126 pNext->AioMgr.pEndpointPrev = pPrev;
127
128 /* Go back until we reached the place to insert the current endpoint into. */
129 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
130 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
131
132 /* Link the endpoint into the list. */
133 if (pEpPrev)
134 pNext = pEpPrev->AioMgr.pEndpointNext;
135 else
136 pNext = pAioMgr->pEndpointsHead;
137
138 pEpCurr->AioMgr.pEndpointNext = pNext;
139 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
140
141 if (pNext)
142 pNext->AioMgr.pEndpointPrev = pEpCurr;
143
144 if (pEpPrev)
145 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
146 else
147 pAioMgr->pEndpointsHead = pEpCurr;
148
149 pEpCurr = pEpNextToSort;
150 }
151
152#ifdef DEBUG
153 /* Validate sorting algorithm */
154 unsigned cEndpoints = 0;
155 pEpCurr = pAioMgr->pEndpointsHead;
156
157 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
158 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
159
160 while (pEpCurr)
161 {
162 cEndpoints++;
163
164 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
165 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
166
167 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
168 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
169
170 pEpCurr = pNext;
171 }
172
173 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
174
175#endif
176}
177
178/**
179 * Removes an endpoint from the currently assigned manager.
180 *
181 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
182 * FALSE otherwise.
183 * @param pEndpointRemove The endpoint to remove.
184 */
185static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
186{
187 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
188 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
189 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
190
191 pAioMgr->cEndpoints--;
192
193 if (pPrev)
194 pPrev->AioMgr.pEndpointNext = pNext;
195 else
196 pAioMgr->pEndpointsHead = pNext;
197
198 if (pNext)
199 pNext->AioMgr.pEndpointPrev = pPrev;
200
201 /* Make sure that there is no request pending on this manager for the endpoint. */
202 if (!pEndpointRemove->AioMgr.cRequestsActive)
203 {
204 Assert(!pEndpointRemove->pFlushReq);
205
206 /* Reopen the file so that the new endpoint can re-associate with the file */
207 RTFileClose(pEndpointRemove->File);
208 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
209 AssertRC(rc);
210 return false;
211 }
212
213 return true;
214}
215
216static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
217{
218 /* Balancing doesn't make sense with only one endpoint. */
219 if (pAioMgr->cEndpoints == 1)
220 return false;
221
222 /* Doesn't make sens to move endpoints if only one produces the whole load */
223 unsigned cEndpointsWithLoad = 0;
224
225 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
226
227 while (pCurr)
228 {
229 if (pCurr->AioMgr.cReqsPerSec)
230 cEndpointsWithLoad++;
231
232 pCurr = pCurr->AioMgr.pEndpointNext;
233 }
234
235 return (cEndpointsWithLoad > 1);
236}
237
238/**
239 * Creates a new I/O manager and spreads the I/O load of the endpoints
240 * between the given I/O manager and the new one.
241 *
242 * @returns nothing.
243 * @param pAioMgr The I/O manager with high I/O load.
244 */
245static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
246{
247 PPDMACEPFILEMGR pAioMgrNew = NULL;
248 int rc = VINF_SUCCESS;
249
250 /*
251 * Check if balancing would improve the situation.
252 */
253 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
254 {
255 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
256
257 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
258 if (RT_SUCCESS(rc))
259 {
260 /* We will sort the list by request count per second. */
261 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
262
263 /* Now move some endpoints to the new manager. */
264 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
265 unsigned cReqsOther = 0;
266 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
267
268 while (pCurr)
269 {
270 if (cReqsHere <= cReqsOther)
271 {
272 /*
273 * The other manager has more requests to handle now.
274 * We will keep the current endpoint.
275 */
276 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
277 cReqsHere += pCurr->AioMgr.cReqsPerSec;
278 pCurr = pCurr->AioMgr.pEndpointNext;
279 }
280 else
281 {
282 /* Move to other endpoint. */
283 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
284 cReqsOther += pCurr->AioMgr.cReqsPerSec;
285
286 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
287
288 pCurr = pCurr->AioMgr.pEndpointNext;
289
290 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
291
292 if (fReqsPending)
293 {
294 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
295 pMove->AioMgr.fMoving = true;
296 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
297 }
298 else
299 {
300 pMove->AioMgr.fMoving = false;
301 pMove->AioMgr.pAioMgrDst = NULL;
302 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
303 }
304 }
305 }
306 }
307 else
308 {
309 /* Don't process further but leave a log entry about reduced performance. */
310 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
311 }
312 }
313 else
314 Log(("AIOMgr: Load balancing would not improve anything\n"));
315}
316
317/**
318 * Increase the maximum number of active requests for the given I/O manager.
319 *
320 * @returns VBox status code.
321 * @param pAioMgr The I/O manager to grow.
322 */
323static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
324{
325 int rc = VINF_SUCCESS;
326 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
327
328 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
329
330 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
331 && !pAioMgr->cRequestsActive,
332 ("Invalid state of the I/O manager\n"));
333
334#ifdef RT_OS_WINDOWS
335 /*
336 * Reopen the files of all assigned endpoints first so we can assign them to the new
337 * I/O context.
338 */
339 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
340
341 while (pCurr)
342 {
343 RTFileClose(pCurr->File);
344 rc = RTFileOpen(&pCurr->File, pCurr->Core.pszUri, pCurr->fFlags);
345 AssertRC(rc);
346
347 pCurr = pCurr->AioMgr.pEndpointNext;
348 }
349#endif
350
351 /* Create the new bigger context. */
352 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
353
354 rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
355 if (rc == VERR_OUT_OF_RANGE)
356 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
357
358 if (RT_SUCCESS(rc))
359 {
360 /* Close the old context. */
361 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
362 AssertRC(rc);
363
364 pAioMgr->hAioCtx = hAioCtxNew;
365
366 /* Create a new I/O task handle array */
367 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
368 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
369
370 if (pahReqNew)
371 {
372 /* Copy the cached request handles. */
373 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
374 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
375
376 RTMemFree(pAioMgr->pahReqsFree);
377 pAioMgr->pahReqsFree = pahReqNew;
378 pAioMgr->cReqEntries = cReqEntriesNew;
379 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
380 pAioMgr->cRequestsActiveMax));
381 }
382 else
383 rc = VERR_NO_MEMORY;
384 }
385
386#ifdef RT_OS_WINDOWS
387 /* Assign the file to the new context. */
388 pCurr = pAioMgr->pEndpointsHead;
389
390 while (pCurr)
391 {
392 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->File);
393 AssertRC(rc);
394
395 pCurr = pCurr->AioMgr.pEndpointNext;
396 }
397#endif
398
399 if (RT_FAILURE(rc))
400 {
401 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
402 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
403 }
404
405 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
406 LogFlowFunc(("returns rc=%Rrc\n", rc));
407
408 return rc;
409}
410
411/**
412 * Checks if a given status code is fatal.
413 * Non fatal errors can be fixed by migrating the endpoint to a
414 * failsafe manager.
415 *
416 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
417 * false If the error can be fixed by a migration. (image on NFS disk for example)
418 * @param rcReq The status code to check.
419 */
420DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
421{
422 return rcReq == VERR_DEV_IO_ERROR
423 || rcReq == VERR_FILE_IO_ERROR
424 || rcReq == VERR_DISK_IO_ERROR
425 || rcReq == VERR_DISK_FULL
426 || rcReq == VERR_FILE_TOO_BIG;
427}
428
429/**
430 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
431 *
432 * @returns VBox status code
433 * @param pAioMgr The I/O manager the error occurred on.
434 * @param rc The error code.
435 */
436static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
437{
438 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
439 pAioMgr, rc));
440 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
441 LogRel(("AIOMgr: Please contact the product vendor\n"));
442
443 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
444
445 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
446 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
447
448 AssertMsgFailed(("Implement\n"));
449 return VINF_SUCCESS;
450}
451
452/**
453 * Put a list of tasks in the pending request list of an endpoint.
454 */
455DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
456{
457 /* Add the rest of the tasks to the pending list */
458 if (!pEndpoint->AioMgr.pReqsPendingHead)
459 {
460 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
461 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
462 }
463 else
464 {
465 Assert(pEndpoint->AioMgr.pReqsPendingTail);
466 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
467 }
468
469 /* Update the tail. */
470 while (pTaskHead->pNext)
471 pTaskHead = pTaskHead->pNext;
472
473 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
474 pTaskHead->pNext = NULL;
475}
476
477/**
478 * Put one task in the pending request list of an endpoint.
479 */
480DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
481{
482 /* Add the rest of the tasks to the pending list */
483 if (!pEndpoint->AioMgr.pReqsPendingHead)
484 {
485 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
486 pEndpoint->AioMgr.pReqsPendingHead = pTask;
487 }
488 else
489 {
490 Assert(pEndpoint->AioMgr.pReqsPendingTail);
491 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
492 }
493
494 pEndpoint->AioMgr.pReqsPendingTail = pTask;
495 pTask->pNext = NULL;
496}
497
498/**
499 * Allocates a async I/O request.
500 *
501 * @returns Handle to the request.
502 * @param pAioMgr The I/O manager.
503 */
504static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
505{
506 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
507
508 /* Get a request handle. */
509 if (pAioMgr->iFreeEntry > 0)
510 {
511 pAioMgr->iFreeEntry--;
512 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
513 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
514 Assert(hReq != NIL_RTFILEAIOREQ);
515 }
516 else
517 {
518 int rc = RTFileAioReqCreate(&hReq);
519 AssertRC(rc);
520 }
521
522 return hReq;
523}
524
525/**
526 * Frees a async I/O request handle.
527 *
528 * @returns nothing.
529 * @param pAioMgr The I/O manager.
530 * @param hReq The I/O request handle to free.
531 */
532static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
533{
534 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
535 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
536
537 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
538 pAioMgr->iFreeEntry++;
539}
540
541/**
542 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
543 */
544static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
545 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
546 PRTFILEAIOREQ pahReqs, unsigned cReqs)
547{
548 int rc;
549
550 pAioMgr->cRequestsActive += cReqs;
551 pEndpoint->AioMgr.cRequestsActive += cReqs;
552
553 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
554 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
555
556 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
557 if (RT_FAILURE(rc))
558 {
559 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
560 {
561 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
562
563 /* Append any not submitted task to the waiting list. */
564 for (size_t i = 0; i < cReqs; i++)
565 {
566 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
567
568 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
569 {
570 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
571
572 Assert(pTask->hReq == pahReqs[i]);
573 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
574 pAioMgr->cRequestsActive--;
575 pEndpoint->AioMgr.cRequestsActive--;
576
577 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
578 {
579 /* Clear the pending flush */
580 Assert(pEndpoint->pFlushReq == pTask);
581 pEndpoint->pFlushReq = NULL;
582 }
583 }
584 }
585
586 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
587
588 /* Print an entry in the release log */
589 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
590 {
591 pEpClass->fOutOfResourcesWarningPrinted = true;
592 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
593 pAioMgr->cRequestsActive));
594 }
595
596 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
597 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
598 rc = VINF_SUCCESS;
599 }
600 else /* Another kind of error happened (full disk, ...) */
601 {
602 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
603 for (size_t i = 0; i < cReqs; i++)
604 {
605 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
606
607 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
608 {
609 /* We call ourself again to do any error handling which might come up now. */
610 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
611 AssertRC(rc);
612 }
613 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
614 {
615 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
616
617 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
618 }
619 }
620
621
622 if ( pEndpoint->pFlushReq
623 && !pAioMgr->cRequestsActive
624 && !pEndpoint->fAsyncFlushSupported)
625 {
626 /*
627 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
628 * the async flush API.
629 * Happens only if this we just noticed that this is not supported
630 * and the only active request was a flush.
631 */
632 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
633 pEndpoint->pFlushReq = NULL;
634 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
635 pdmacFileTaskFree(pEndpoint, pFlush);
636 }
637 }
638 }
639
640 return VINF_SUCCESS;
641}
642
643static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
644 RTFOFF offStart, size_t cbRange,
645 PPDMACTASKFILE pTask)
646{
647 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
648
649 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
650 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
651 ("Invalid task type %d\n", pTask->enmTransferType));
652
653 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
654 if (!pRangeLock)
655 {
656 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
657 /* Check if we intersect with the range. */
658 if ( !pRangeLock
659 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
660 && (pRangeLock->Core.KeyLast) >= offStart))
661 {
662 pRangeLock = NULL; /* False alarm */
663 }
664 }
665
666 /* Check whether we have one of the situations explained below */
667 if ( pRangeLock
668#if 0 /** @todo: later. For now we will just block all requests if they interfere */
669 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
670 || (!pRangeLock->fReadLock)
671#endif
672 )
673 {
674 /* Add to the list. */
675 pTask->pNext = NULL;
676
677 if (!pRangeLock->pWaitingTasksHead)
678 {
679 Assert(!pRangeLock->pWaitingTasksTail);
680 pRangeLock->pWaitingTasksHead = pTask;
681 pRangeLock->pWaitingTasksTail = pTask;
682 }
683 else
684 {
685 AssertPtr(pRangeLock->pWaitingTasksTail);
686 pRangeLock->pWaitingTasksTail->pNext = pTask;
687 pRangeLock->pWaitingTasksTail = pTask;
688 }
689 return true;
690 }
691
692 return false;
693}
694
695static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
696 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
697 RTFOFF offStart, size_t cbRange,
698 PPDMACTASKFILE pTask)
699{
700 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
701 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
702 offStart, cbRange));
703
704 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
705 if (!pRangeLock)
706 return VERR_NO_MEMORY;
707
708 /* Init the lock. */
709 pRangeLock->Core.Key = offStart;
710 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
711 pRangeLock->cRefs = 1;
712 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
713 pRangeLock->pWaitingTasksHead = NULL;
714 pRangeLock->pWaitingTasksTail = NULL;
715
716 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
717 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
718
719 /* Let the task point to its lock. */
720 pTask->pRangeLock = pRangeLock;
721
722 return VINF_SUCCESS;
723}
724
725static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
726 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
727 PPDMACFILERANGELOCK pRangeLock)
728{
729 PPDMACTASKFILE pTasksWaitingHead;
730
731 AssertPtr(pRangeLock);
732 Assert(pRangeLock->cRefs == 1);
733
734 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
735 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
736 pRangeLock->pWaitingTasksHead = NULL;
737 pRangeLock->pWaitingTasksTail = NULL;
738 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
739
740 return pTasksWaitingHead;
741}
742
743static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
744 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
745 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
746{
747 int rc = VINF_SUCCESS;
748 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
749 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
750 void *pvBuf = pTask->DataSeg.pvSeg;
751
752 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
753 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
754 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
755 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
756
757 pTask->fPrefetch = false;
758 pTask->cbBounceBuffer = 0;
759
760 /*
761 * Before we start to setup the request we have to check whether there is a task
762 * already active which range intersects with ours. We have to defer execution
763 * of this task in two cases:
764 * - The pending task is a write and the current is either read or write
765 * - The pending task is a read and the current task is a write task.
766 *
767 * To check whether a range is currently "locked" we use the AVL tree where every pending task
768 * is stored by its file offset range. The current task will be added to the active task
769 * and will be executed when the active one completes. (The method below
770 * which checks whether a range is already used will add the task)
771 *
772 * This is necessary because of the requirement to align all requests to a 512 boundary
773 * which is enforced by the host OS (Linux and Windows atm). It is possible that
774 * we have to process unaligned tasks and need to align them using bounce buffers.
775 * While the data is fetched from the file another request might arrive writing to
776 * the same range. This will result in data corruption if both are executed concurrently.
777 */
778 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
779
780 if (!fLocked)
781 {
782 /* Get a request handle. */
783 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
784 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
785
786 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
787 {
788 /* Grow the file if needed. */
789 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
790 {
791 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
792 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
793 }
794
795 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
796 pTask->Off, pTask->DataSeg.pvSeg,
797 pTask->DataSeg.cbSeg, pTask);
798 }
799 else
800 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
801 pTask->Off, pTask->DataSeg.pvSeg,
802 pTask->DataSeg.cbSeg, pTask);
803 AssertRC(rc);
804
805 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
806 pTask->DataSeg.cbSeg,
807 pTask);
808
809 if (RT_SUCCESS(rc))
810 {
811 pTask->hReq = hReq;
812 *phReq = hReq;
813 }
814 }
815 else
816 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
817
818 return rc;
819}
820
821static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
822 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
823 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
824{
825 int rc = VINF_SUCCESS;
826 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
827 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
828 void *pvBuf = pTask->DataSeg.pvSeg;
829
830 /*
831 * Check if the alignment requirements are met.
832 * Offset, transfer size and buffer address
833 * need to be on a 512 boundary.
834 */
835 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
836 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
837 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
838
839 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
840 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
841 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
842 offStart, cbToTransfer, pEndpoint->cbFile));
843
844 pTask->fPrefetch = false;
845
846 /*
847 * Before we start to setup the request we have to check whether there is a task
848 * already active which range intersects with ours. We have to defer execution
849 * of this task in two cases:
850 * - The pending task is a write and the current is either read or write
851 * - The pending task is a read and the current task is a write task.
852 *
853 * To check whether a range is currently "locked" we use the AVL tree where every pending task
854 * is stored by its file offset range. The current task will be added to the active task
855 * and will be executed when the active one completes. (The method below
856 * which checks whether a range is already used will add the task)
857 *
858 * This is necessary because of the requirement to align all requests to a 512 boundary
859 * which is enforced by the host OS (Linux and Windows atm). It is possible that
860 * we have to process unaligned tasks and need to align them using bounce buffers.
861 * While the data is fetched from the file another request might arrive writing to
862 * the same range. This will result in data corruption if both are executed concurrently.
863 */
864 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
865
866 if (!fLocked)
867 {
868 /* Get a request handle. */
869 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
870 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
871
872 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
873 || RT_UNLIKELY(offStart != pTask->Off)
874 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
875 {
876 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
877 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
878
879 /* Create bounce buffer. */
880 pTask->cbBounceBuffer = cbToTransfer;
881
882 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
883 pTask->Off, offStart));
884 pTask->offBounceBuffer = pTask->Off - offStart;
885
886 /** @todo: I think we need something like a RTMemAllocAligned method here.
887 * Current assumption is that the maximum alignment is 4096byte
888 * (GPT disk on Windows)
889 * so we can use RTMemPageAlloc here.
890 */
891 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
892 if (RT_LIKELY(pTask->pvBounceBuffer))
893 {
894 pvBuf = pTask->pvBounceBuffer;
895
896 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
897 {
898 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
899 || RT_UNLIKELY(offStart != pTask->Off))
900 {
901 /* We have to fill the buffer first before we can update the data. */
902 LogFlow(("Prefetching data for task %#p\n", pTask));
903 pTask->fPrefetch = true;
904 enmTransferType = PDMACTASKFILETRANSFER_READ;
905 }
906 else
907 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
908 }
909 }
910 else
911 rc = VERR_NO_MEMORY;
912 }
913 else
914 pTask->cbBounceBuffer = 0;
915
916 if (RT_SUCCESS(rc))
917 {
918 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
919 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
920
921 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
922 {
923 /* Grow the file if needed. */
924 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
925 {
926 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
927 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
928 }
929
930 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
931 offStart, pvBuf, cbToTransfer, pTask);
932 }
933 else
934 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
935 offStart, pvBuf, cbToTransfer, pTask);
936 AssertRC(rc);
937
938 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
939
940 if (RT_SUCCESS(rc))
941 {
942 pTask->hReq = hReq;
943 *phReq = hReq;
944 }
945 else
946 {
947 /* Cleanup */
948 if (pTask->cbBounceBuffer)
949 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
950 }
951 }
952 }
953 else
954 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
955
956 return rc;
957}
958
959static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
960 PPDMACEPFILEMGR pAioMgr,
961 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
962{
963 RTFILEAIOREQ apReqs[20];
964 unsigned cRequests = 0;
965 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
966 int rc = VINF_SUCCESS;
967
968 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
969 ("Trying to process request lists of a non active endpoint!\n"));
970
971 /* Go through the list and queue the requests until we get a flush request */
972 while ( pTaskHead
973 && !pEndpoint->pFlushReq
974 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
975 && RT_SUCCESS(rc))
976 {
977 RTMSINTERVAL msWhenNext;
978 PPDMACTASKFILE pCurr = pTaskHead;
979
980 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
981 {
982 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
983 break;
984 }
985
986 pTaskHead = pTaskHead->pNext;
987
988 pCurr->pNext = NULL;
989
990 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
991 ("Endpoints do not match\n"));
992
993 switch (pCurr->enmTransferType)
994 {
995 case PDMACTASKFILETRANSFER_FLUSH:
996 {
997 /* If there is no data transfer request this flush request finished immediately. */
998 if (pEndpoint->fAsyncFlushSupported)
999 {
1000 /* Issue a flush to the host. */
1001 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1002 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1003
1004 LogFlow(("Flush request %#p\n", hReq));
1005
1006 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->File, pCurr);
1007 if (RT_FAILURE(rc))
1008 {
1009 pEndpoint->fAsyncFlushSupported = false;
1010 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1011 rc = VINF_SUCCESS; /* Fake success */
1012 }
1013 else
1014 {
1015 pCurr->hReq = hReq;
1016 apReqs[cRequests] = hReq;
1017 pEndpoint->AioMgr.cReqsProcessed++;
1018 cRequests++;
1019 }
1020 }
1021
1022 if ( !pEndpoint->AioMgr.cRequestsActive
1023 && !pEndpoint->fAsyncFlushSupported)
1024 {
1025 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1026 pdmacFileTaskFree(pEndpoint, pCurr);
1027 }
1028 else
1029 {
1030 Assert(!pEndpoint->pFlushReq);
1031 pEndpoint->pFlushReq = pCurr;
1032 }
1033 break;
1034 }
1035 case PDMACTASKFILETRANSFER_READ:
1036 case PDMACTASKFILETRANSFER_WRITE:
1037 {
1038 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1039
1040 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1041 {
1042 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1043 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1044 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1045 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1046 else
1047 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1048
1049 AssertRC(rc);
1050 }
1051 else
1052 {
1053 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1054 hReq = pCurr->hReq;
1055 }
1056
1057 LogFlow(("Read/Write request %#p\n", hReq));
1058
1059 if (hReq != NIL_RTFILEAIOREQ)
1060 {
1061 apReqs[cRequests] = hReq;
1062 cRequests++;
1063 }
1064 break;
1065 }
1066 default:
1067 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1068 } /* switch transfer type */
1069
1070 /* Queue the requests if the array is full. */
1071 if (cRequests == RT_ELEMENTS(apReqs))
1072 {
1073 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1074 cRequests = 0;
1075 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1076 ("Unexpected return code\n"));
1077 }
1078 }
1079
1080 if (cRequests)
1081 {
1082 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1083 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1084 ("Unexpected return code rc=%Rrc\n", rc));
1085 }
1086
1087 if (pTaskHead)
1088 {
1089 /* Add the rest of the tasks to the pending list */
1090 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1091
1092 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1093 && !pEndpoint->pFlushReq))
1094 {
1095#if 0
1096 /*
1097 * The I/O manager has no room left for more requests
1098 * but there are still requests to process.
1099 * Create a new I/O manager and let it handle some endpoints.
1100 */
1101 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1102#else
1103 /* Grow the I/O manager */
1104 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1105#endif
1106 }
1107 }
1108
1109 /* Insufficient resources are not fatal. */
1110 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1111 rc = VINF_SUCCESS;
1112
1113 return rc;
1114}
1115
1116/**
1117 * Adds all pending requests for the given endpoint
1118 * until a flush request is encountered or there is no
1119 * request anymore.
1120 *
1121 * @returns VBox status code.
1122 * @param pAioMgr The async I/O manager for the endpoint
1123 * @param pEndpoint The endpoint to get the requests from.
1124 */
1125static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1126 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1127{
1128 int rc = VINF_SUCCESS;
1129 PPDMACTASKFILE pTasksHead = NULL;
1130
1131 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1132 ("Trying to process request lists of a non active endpoint!\n"));
1133
1134 Assert(!pEndpoint->pFlushReq);
1135
1136 /* Check the pending list first */
1137 if (pEndpoint->AioMgr.pReqsPendingHead)
1138 {
1139 LogFlow(("Queuing pending requests first\n"));
1140
1141 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1142 /*
1143 * Clear the list as the processing routine will insert them into the list
1144 * again if it gets a flush request.
1145 */
1146 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1147 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1148 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1149 AssertRC(rc);
1150 }
1151
1152 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1153 {
1154 /* Now the request queue. */
1155 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1156 if (pTasksHead)
1157 {
1158 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1159 AssertRC(rc);
1160 }
1161 }
1162
1163 return rc;
1164}
1165
1166static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1167{
1168 int rc = VINF_SUCCESS;
1169 bool fNotifyWaiter = false;
1170
1171 LogFlowFunc((": Enter\n"));
1172
1173 Assert(pAioMgr->fBlockingEventPending);
1174
1175 switch (pAioMgr->enmBlockingEvent)
1176 {
1177 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1178 {
1179 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1180 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1181
1182 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1183
1184 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1185 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1186 if (pAioMgr->pEndpointsHead)
1187 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1188 pAioMgr->pEndpointsHead = pEndpointNew;
1189
1190 /* Assign the completion point to this file. */
1191 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
1192 fNotifyWaiter = true;
1193 pAioMgr->cEndpoints++;
1194 break;
1195 }
1196 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1197 {
1198 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1199 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1200
1201 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1202 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1203 break;
1204 }
1205 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1206 {
1207 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1208 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1209
1210 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1211 {
1212 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1213
1214 /* Make sure all tasks finished. Process the queues a last time first. */
1215 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1216 AssertRC(rc);
1217
1218 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1219 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1220 }
1221 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1222 && (!pEndpointClose->AioMgr.cRequestsActive))
1223 fNotifyWaiter = true;
1224 break;
1225 }
1226 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1227 {
1228 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1229 if (!pAioMgr->cRequestsActive)
1230 fNotifyWaiter = true;
1231 break;
1232 }
1233 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1234 {
1235 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1236 break;
1237 }
1238 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1239 {
1240 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1241 fNotifyWaiter = true;
1242 break;
1243 }
1244 default:
1245 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1246 }
1247
1248 if (fNotifyWaiter)
1249 {
1250 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1251 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1252
1253 /* Release the waiting thread. */
1254 LogFlow(("Signalling waiter\n"));
1255 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1256 AssertRC(rc);
1257 }
1258
1259 LogFlowFunc((": Leave\n"));
1260 return rc;
1261}
1262
1263/**
1264 * Checks all endpoints for pending events or new requests.
1265 *
1266 * @returns VBox status code.
1267 * @param pAioMgr The I/O manager handle.
1268 */
1269static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1270{
1271 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1272 int rc = VINF_SUCCESS;
1273 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1274
1275 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1276
1277 while (pEndpoint)
1278 {
1279 if (!pEndpoint->pFlushReq
1280 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1281 && !pEndpoint->AioMgr.fMoving)
1282 {
1283 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1284 if (RT_FAILURE(rc))
1285 return rc;
1286 }
1287 else if ( !pEndpoint->AioMgr.cRequestsActive
1288 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1289 {
1290 /* Reopen the file so that the new endpoint can re-associate with the file */
1291 RTFileClose(pEndpoint->File);
1292 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1293 AssertRC(rc);
1294
1295 if (pEndpoint->AioMgr.fMoving)
1296 {
1297 pEndpoint->AioMgr.fMoving = false;
1298 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1299 }
1300 else
1301 {
1302 Assert(pAioMgr->fBlockingEventPending);
1303 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1304
1305 /* Release the waiting thread. */
1306 LogFlow(("Signalling waiter\n"));
1307 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1308 AssertRC(rc);
1309 }
1310 }
1311
1312 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1313 }
1314
1315 return rc;
1316}
1317
1318/**
1319 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1320 */
1321static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1322{
1323 size_t cbTransfered = 0;
1324 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1325
1326 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1327}
1328
1329static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1330 int rcReq, size_t cbTransfered)
1331{
1332 int rc = VINF_SUCCESS;
1333 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1334 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1335 PPDMACTASKFILE pTasksWaiting;
1336
1337 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1338
1339 pEndpoint = pTask->pEndpoint;
1340
1341 pTask->hReq = NIL_RTFILEAIOREQ;
1342
1343 pAioMgr->cRequestsActive--;
1344 pEndpoint->AioMgr.cRequestsActive--;
1345 pEndpoint->AioMgr.cReqsProcessed++;
1346
1347 /*
1348 * It is possible that the request failed on Linux with kernels < 2.6.23
1349 * if the passed buffer was allocated with remap_pfn_range or if the file
1350 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1351 * The endpoint will be migrated to a failsafe manager in case a request fails.
1352 */
1353 if (RT_FAILURE(rcReq))
1354 {
1355 /* Free bounce buffers and the IPRT request. */
1356 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1357
1358 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1359 {
1360 LogFlow(("Async flushes are not supported for this endpoint, disabling\n"));
1361 pEndpoint->fAsyncFlushSupported = false;
1362 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1363 /* The other method will take over now. */
1364 }
1365 else
1366 {
1367 /* Free the lock and process pending tasks if necessary */
1368 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1369 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1370 AssertRC(rc);
1371
1372 if (pTask->cbBounceBuffer)
1373 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1374
1375 /*
1376 * Fatal errors are reported to the guest and non-fatal errors
1377 * will cause a migration to the failsafe manager in the hope
1378 * that the error disappears.
1379 */
1380 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1381 {
1382 /* Queue the request on the pending list. */
1383 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1384 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1385
1386 /* Create a new failsafe manager if necessary. */
1387 if (!pEndpoint->AioMgr.fMoving)
1388 {
1389 PPDMACEPFILEMGR pAioMgrFailsafe;
1390
1391 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1392 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1393
1394 pEndpoint->AioMgr.fMoving = true;
1395
1396 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1397 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1398 AssertRC(rc);
1399
1400 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1401
1402 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1403 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1404 }
1405
1406 /* If this was the last request for the endpoint migrate it to the new manager. */
1407 if (!pEndpoint->AioMgr.cRequestsActive)
1408 {
1409 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1410 Assert(!fReqsPending);
1411
1412 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1413 AssertRC(rc);
1414 }
1415 }
1416 else
1417 {
1418 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1419 pdmacFileTaskFree(pEndpoint, pTask);
1420 }
1421 }
1422 }
1423 else
1424 {
1425 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1426 {
1427 /* Clear pending flush */
1428 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1429 pEndpoint->pFlushReq = NULL;
1430 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1431
1432 /* Call completion callback */
1433 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1434 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1435 pdmacFileTaskFree(pEndpoint, pTask);
1436 }
1437 else
1438 {
1439 /*
1440 * Restart an incomplete transfer.
1441 * This usually means that the request will return an error now
1442 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1443 * the transfer needs to be continued.
1444 */
1445 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1446 || ( pTask->cbBounceBuffer
1447 && cbTransfered < pTask->cbBounceBuffer)))
1448 {
1449 RTFOFF offStart;
1450 size_t cbToTransfer;
1451 uint8_t *pbBuf = NULL;
1452
1453 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1454 pTask, cbTransfered));
1455 Assert(cbTransfered % 512 == 0);
1456
1457 if (pTask->cbBounceBuffer)
1458 {
1459 AssertPtr(pTask->pvBounceBuffer);
1460 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1461 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1462 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1463 }
1464 else
1465 {
1466 Assert(!pTask->pvBounceBuffer);
1467 offStart = pTask->Off + cbTransfered;
1468 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1469 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1470 }
1471
1472 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1473 {
1474 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File, offStart,
1475 pbBuf, cbToTransfer, pTask);
1476 }
1477 else
1478 {
1479 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1480 ("Invalid transfer type\n"));
1481 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File, offStart,
1482 pbBuf, cbToTransfer, pTask);
1483 }
1484 AssertRC(rc);
1485
1486 pTask->hReq = hReq;
1487 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1488 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1489 ("Unexpected return code rc=%Rrc\n", rc));
1490 }
1491 else if (pTask->fPrefetch)
1492 {
1493 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1494 Assert(pTask->cbBounceBuffer);
1495
1496 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1497 pTask->DataSeg.pvSeg,
1498 pTask->DataSeg.cbSeg);
1499
1500 /* Write it now. */
1501 pTask->fPrefetch = false;
1502 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1503 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1504
1505 /* Grow the file if needed. */
1506 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1507 {
1508 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1509 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1510 }
1511
1512 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1513 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1514 AssertRC(rc);
1515 pTask->hReq = hReq;
1516 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1517 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1518 ("Unexpected return code rc=%Rrc\n", rc));
1519 }
1520 else
1521 {
1522 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1523 {
1524 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1525 memcpy(pTask->DataSeg.pvSeg,
1526 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1527 pTask->DataSeg.cbSeg);
1528
1529 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1530 }
1531
1532 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1533
1534 /* Free the lock and process pending tasks if necessary */
1535 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1536 if (pTasksWaiting)
1537 {
1538 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1539 AssertRC(rc);
1540 }
1541
1542 /* Call completion callback */
1543 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1544 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1545 pdmacFileTaskFree(pEndpoint, pTask);
1546
1547 /*
1548 * If there is no request left on the endpoint but a flush request is set
1549 * it completed now and we notify the owner.
1550 * Furthermore we look for new requests and continue.
1551 */
1552 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1553 {
1554 /* Call completion callback */
1555 pTask = pEndpoint->pFlushReq;
1556 pEndpoint->pFlushReq = NULL;
1557
1558 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1559
1560 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1561 pdmacFileTaskFree(pEndpoint, pTask);
1562 }
1563 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1564 {
1565 /* If the endpoint is about to be migrated do it now. */
1566 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1567 Assert(!fReqsPending);
1568
1569 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1570 AssertRC(rc);
1571 }
1572 }
1573 } /* Not a flush request */
1574 } /* request completed successfully */
1575}
1576
1577/** Helper macro for checking for error codes. */
1578#define CHECK_RC(pAioMgr, rc) \
1579 if (RT_FAILURE(rc)) \
1580 {\
1581 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1582 return rc2;\
1583 }
1584
1585/**
1586 * The normal I/O manager using the RTFileAio* API
1587 *
1588 * @returns VBox status code.
1589 * @param ThreadSelf Handle of the thread.
1590 * @param pvUser Opaque user data.
1591 */
1592int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1593{
1594 int rc = VINF_SUCCESS;
1595 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1596 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1597
1598 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1599 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING)
1600 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1601 {
1602 if (!pAioMgr->cRequestsActive)
1603 {
1604 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1605 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1606 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1607 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1608 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1609
1610 LogFlow(("Got woken up\n"));
1611 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1612 }
1613
1614 /* Check for an external blocking event first. */
1615 if (pAioMgr->fBlockingEventPending)
1616 {
1617 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1618 CHECK_RC(pAioMgr, rc);
1619 }
1620
1621 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1622 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1623 {
1624 /* We got woken up because an endpoint issued new requests. Queue them. */
1625 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1626 CHECK_RC(pAioMgr, rc);
1627
1628 while (pAioMgr->cRequestsActive)
1629 {
1630 RTFILEAIOREQ apReqs[20];
1631 uint32_t cReqsCompleted = 0;
1632 size_t cReqsWait;
1633
1634 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1635 cReqsWait = RT_ELEMENTS(apReqs);
1636 else
1637 cReqsWait = pAioMgr->cRequestsActive;
1638
1639 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1640
1641 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1642 1,
1643 RT_INDEFINITE_WAIT, apReqs,
1644 cReqsWait, &cReqsCompleted);
1645 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1646 CHECK_RC(pAioMgr, rc);
1647
1648 LogFlow(("%d tasks completed\n", cReqsCompleted));
1649
1650 for (uint32_t i = 0; i < cReqsCompleted; i++)
1651 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1652
1653 /* Check for an external blocking event before we go to sleep again. */
1654 if (pAioMgr->fBlockingEventPending)
1655 {
1656 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1657 CHECK_RC(pAioMgr, rc);
1658 }
1659
1660 /* Update load statistics. */
1661 uint64_t uMillisCurr = RTTimeMilliTS();
1662 if (uMillisCurr > uMillisEnd)
1663 {
1664 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1665
1666 /* Calculate timespan. */
1667 uMillisCurr -= uMillisEnd;
1668
1669 while (pEndpointCurr)
1670 {
1671 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1672 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1673 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1674 }
1675
1676 /* Set new update interval */
1677 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1678 }
1679
1680 /* Check endpoints for new requests. */
1681 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1682 {
1683 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1684 CHECK_RC(pAioMgr, rc);
1685 }
1686 } /* while requests are active. */
1687
1688 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1689 {
1690 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1691 AssertRC(rc);
1692 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1693
1694 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1695 CHECK_RC(pAioMgr, rc);
1696 }
1697 } /* if still running */
1698 } /* while running */
1699
1700 LogFlowFunc(("rc=%Rrc\n", rc));
1701 return rc;
1702}
1703
1704#undef CHECK_RC
1705
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette