VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 32054

Last change on this file since 32054 was 30111, checked in by vboxsync, 15 years ago

iprt/asm.h,*: Revised the ASMAtomic*Ptr functions and macros. The new saves lots of unsafe (void * volatile *) casts as well as adding some type safety when using GCC (typeof rulez).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 65.2 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 30111 2010-06-09 12:14:59Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Oracle Corporation
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 */
18#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
19#include <iprt/types.h>
20#include <iprt/asm.h>
21#include <iprt/file.h>
22#include <iprt/mem.h>
23#include <iprt/string.h>
24#include <iprt/assert.h>
25#include <VBox/log.h>
26
27#include "PDMAsyncCompletionFileInternal.h"
28
29/** The update period for the I/O load statistics in ms. */
30#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
31/** Maximum number of requests a manager will handle. */
32#define PDMACEPFILEMGR_REQS_STEP 512
33
34/*******************************************************************************
35* Internal functions *
36*******************************************************************************/
37static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
38 PPDMACEPFILEMGR pAioMgr,
39 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
40
41static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
42 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
43 PPDMACFILERANGELOCK pRangeLock);
44
45int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
46{
47 int rc = VINF_SUCCESS;
48
49 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
50
51 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
52 if (rc == VERR_OUT_OF_RANGE)
53 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
54
55 if (RT_SUCCESS(rc))
56 {
57 /* Initialize request handle array. */
58 pAioMgr->iFreeEntry = 0;
59 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
60 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
61
62 if (pAioMgr->pahReqsFree)
63 {
64 /* Create the range lock memcache. */
65 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
66 0, UINT32_MAX, NULL, NULL, NULL, 0);
67 if (RT_SUCCESS(rc))
68 return VINF_SUCCESS;
69
70 RTMemFree(pAioMgr->pahReqsFree);
71 }
72 else
73 {
74 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
75 rc = VERR_NO_MEMORY;
76 }
77 }
78
79 return rc;
80}
81
82void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
83{
84 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
85
86 while (pAioMgr->iFreeEntry > 0)
87 {
88 pAioMgr->iFreeEntry--;
89 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
90 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
91 }
92
93 RTMemFree(pAioMgr->pahReqsFree);
94 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
95}
96
97/**
98 * Sorts the endpoint list with insertion sort.
99 */
100static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
101{
102 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
103
104 pEpPrev = pAioMgr->pEndpointsHead;
105 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
106
107 while (pEpCurr)
108 {
109 /* Remember the next element to sort because the list might change. */
110 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
111
112 /* Unlink the current element from the list. */
113 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
114 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
115
116 if (pPrev)
117 pPrev->AioMgr.pEndpointNext = pNext;
118 else
119 pAioMgr->pEndpointsHead = pNext;
120
121 if (pNext)
122 pNext->AioMgr.pEndpointPrev = pPrev;
123
124 /* Go back until we reached the place to insert the current endpoint into. */
125 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
126 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
127
128 /* Link the endpoint into the list. */
129 if (pEpPrev)
130 pNext = pEpPrev->AioMgr.pEndpointNext;
131 else
132 pNext = pAioMgr->pEndpointsHead;
133
134 pEpCurr->AioMgr.pEndpointNext = pNext;
135 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
136
137 if (pNext)
138 pNext->AioMgr.pEndpointPrev = pEpCurr;
139
140 if (pEpPrev)
141 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
142 else
143 pAioMgr->pEndpointsHead = pEpCurr;
144
145 pEpCurr = pEpNextToSort;
146 }
147
148#ifdef DEBUG
149 /* Validate sorting alogrithm */
150 unsigned cEndpoints = 0;
151 pEpCurr = pAioMgr->pEndpointsHead;
152
153 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
154 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
155
156 while (pEpCurr)
157 {
158 cEndpoints++;
159
160 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
161 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
162
163 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
164 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
165
166 pEpCurr = pNext;
167 }
168
169 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
170
171#endif
172}
173
174/**
175 * Removes an endpoint from the currently assigned manager.
176 *
177 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
178 * FALSE otherwise.
179 * @param pEndpointRemove The endpoint to remove.
180 */
181static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
182{
183 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
184 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
185 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
186
187 pAioMgr->cEndpoints--;
188
189 if (pPrev)
190 pPrev->AioMgr.pEndpointNext = pNext;
191 else
192 pAioMgr->pEndpointsHead = pNext;
193
194 if (pNext)
195 pNext->AioMgr.pEndpointPrev = pPrev;
196
197 /* Make sure that there is no request pending on this manager for the endpoint. */
198 if (!pEndpointRemove->AioMgr.cRequestsActive)
199 {
200 Assert(!pEndpointRemove->pFlushReq);
201
202 /* Reopen the file so that the new endpoint can reassociate with the file */
203 RTFileClose(pEndpointRemove->File);
204 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
205 AssertRC(rc);
206 return false;
207 }
208
209 return true;
210}
211
212static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
213{
214 /* Balancing doesn't make sense with only one endpoint. */
215 if (pAioMgr->cEndpoints == 1)
216 return false;
217
218 /* Doesn't make sens to move endpoints if only one produces the whole load */
219 unsigned cEndpointsWithLoad = 0;
220
221 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
222
223 while (pCurr)
224 {
225 if (pCurr->AioMgr.cReqsPerSec)
226 cEndpointsWithLoad++;
227
228 pCurr = pCurr->AioMgr.pEndpointNext;
229 }
230
231 return (cEndpointsWithLoad > 1);
232}
233
234/**
235 * Creates a new I/O manager and spreads the I/O load of the endpoints
236 * between the given I/O manager and the new one.
237 *
238 * @returns nothing.
239 * @param pAioMgr The I/O manager with high I/O load.
240 */
241static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
242{
243 PPDMACEPFILEMGR pAioMgrNew = NULL;
244 int rc = VINF_SUCCESS;
245
246 /*
247 * Check if balancing would improve the situation.
248 */
249 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
250 {
251 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
252
253 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
254 if (RT_SUCCESS(rc))
255 {
256 /* We will sort the list by request count per second. */
257 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
258
259 /* Now move some endpoints to the new manager. */
260 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
261 unsigned cReqsOther = 0;
262 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
263
264 while (pCurr)
265 {
266 if (cReqsHere <= cReqsOther)
267 {
268 /*
269 * The other manager has more requests to handle now.
270 * We will keep the current endpoint.
271 */
272 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
273 cReqsHere += pCurr->AioMgr.cReqsPerSec;
274 pCurr = pCurr->AioMgr.pEndpointNext;
275 }
276 else
277 {
278 /* Move to other endpoint. */
279 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
280 cReqsOther += pCurr->AioMgr.cReqsPerSec;
281
282 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
283
284 pCurr = pCurr->AioMgr.pEndpointNext;
285
286 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
287
288 if (fReqsPending)
289 {
290 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
291 pMove->AioMgr.fMoving = true;
292 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
293 }
294 else
295 {
296 pMove->AioMgr.fMoving = false;
297 pMove->AioMgr.pAioMgrDst = NULL;
298 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
299 }
300 }
301 }
302 }
303 else
304 {
305 /* Don't process further but leave a log entry about reduced performance. */
306 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
307 }
308 }
309 else
310 Log(("AIOMgr: Load balancing would not improve anything\n"));
311}
312
313/**
314 * Increase the maximum number of active requests for the given I/O manager.
315 *
316 * @returns VBox status code.
317 * @param pAioMgr The I/O manager to grow.
318 */
319static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
320{
321 int rc = VINF_SUCCESS;
322 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
323
324 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
325
326 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
327 && !pAioMgr->cRequestsActive,
328 ("Invalid state of the I/O manager\n"));
329
330#ifdef RT_OS_WINDOWS
331 /*
332 * Reopen the files of all assigned endpoints first so we can assign them to the new
333 * I/O context.
334 */
335 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
336
337 while (pCurr)
338 {
339 RTFileClose(pCurr->File);
340 rc = RTFileOpen(&pCurr->File, pCurr->Core.pszUri, pCurr->fFlags);
341 AssertRC(rc);
342
343 pCurr = pCurr->AioMgr.pEndpointNext;
344 }
345#endif
346
347 /* Create the new bigger context. */
348 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
349
350 rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
351 if (rc == VERR_OUT_OF_RANGE)
352 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
353
354 if (RT_SUCCESS(rc))
355 {
356 /* Close the old context. */
357 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
358 AssertRC(rc);
359
360 pAioMgr->hAioCtx = hAioCtxNew;
361
362 /* Create a new I/O task handle array */
363 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
364 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
365
366 if (pahReqNew)
367 {
368 /* Copy the cached request handles. */
369 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
370 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
371
372 RTMemFree(pAioMgr->pahReqsFree);
373 pAioMgr->pahReqsFree = pahReqNew;
374 pAioMgr->cReqEntries = cReqEntriesNew;
375 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
376 pAioMgr->cRequestsActiveMax));
377 }
378 else
379 rc = VERR_NO_MEMORY;
380 }
381
382#ifdef RT_OS_WINDOWS
383 /* Assign the file to the new context. */
384 pCurr = pAioMgr->pEndpointsHead;
385
386 while (pCurr)
387 {
388 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->File);
389 AssertRC(rc);
390
391 pCurr = pCurr->AioMgr.pEndpointNext;
392 }
393#endif
394
395 if (RT_FAILURE(rc))
396 {
397 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
398 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
399 }
400
401 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
402 LogFlowFunc(("returns rc=%Rrc\n", rc));
403
404 return rc;
405}
406
407/**
408 * Checks if a given status code is fatal.
409 * Non fatal errors can be fixed by migrating the endpoint to a
410 * failsafe manager.
411 *
412 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
413 * false If the error can be fixed by a migration. (image on NFS disk for example)
414 * @param rcReq The status code to check.
415 */
416DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
417{
418 return rcReq == VERR_DEV_IO_ERROR
419 || rcReq == VERR_FILE_IO_ERROR
420 || rcReq == VERR_DISK_IO_ERROR
421 || rcReq == VERR_DISK_FULL
422 || rcReq == VERR_FILE_TOO_BIG;
423}
424
425/**
426 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
427 *
428 * @returns VBox status code
429 * @param pAioMgr The I/O manager the error ocurred on.
430 * @param rc The error code.
431 */
432static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
433{
434 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
435 pAioMgr, rc));
436 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
437 LogRel(("AIOMgr: Please contact the product vendor\n"));
438
439 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
440
441 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
442 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
443
444 AssertMsgFailed(("Implement\n"));
445 return VINF_SUCCESS;
446}
447
448/**
449 * Put a list of tasks in the pending request list of an endpoint.
450 */
451DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
452{
453 /* Add the rest of the tasks to the pending list */
454 if (!pEndpoint->AioMgr.pReqsPendingHead)
455 {
456 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
457 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
458 }
459 else
460 {
461 Assert(pEndpoint->AioMgr.pReqsPendingTail);
462 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
463 }
464
465 /* Update the tail. */
466 while (pTaskHead->pNext)
467 pTaskHead = pTaskHead->pNext;
468
469 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
470 pTaskHead->pNext = NULL;
471}
472
473/**
474 * Put one task in the pending request list of an endpoint.
475 */
476DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
477{
478 /* Add the rest of the tasks to the pending list */
479 if (!pEndpoint->AioMgr.pReqsPendingHead)
480 {
481 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
482 pEndpoint->AioMgr.pReqsPendingHead = pTask;
483 }
484 else
485 {
486 Assert(pEndpoint->AioMgr.pReqsPendingTail);
487 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
488 }
489
490 pEndpoint->AioMgr.pReqsPendingTail = pTask;
491 pTask->pNext = NULL;
492}
493
494/**
495 * Allocates a async I/O request.
496 *
497 * @returns Handle to the request.
498 * @param pAioMgr The I/O manager.
499 */
500static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
501{
502 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
503
504 /* Get a request handle. */
505 if (pAioMgr->iFreeEntry > 0)
506 {
507 pAioMgr->iFreeEntry--;
508 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
509 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
510 Assert(hReq != NIL_RTFILEAIOREQ);
511 }
512 else
513 {
514 int rc = RTFileAioReqCreate(&hReq);
515 AssertRC(rc);
516 }
517
518 return hReq;
519}
520
521/**
522 * Frees a async I/O request handle.
523 *
524 * @returns nothing.
525 * @param pAioMgr The I/O manager.
526 * @param hReq The I/O request handle to free.
527 */
528static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
529{
530 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
531 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
532
533 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
534 pAioMgr->iFreeEntry++;
535}
536
537/**
538 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
539 */
540static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
541 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
542 PRTFILEAIOREQ pahReqs, unsigned cReqs)
543{
544 int rc;
545
546 pAioMgr->cRequestsActive += cReqs;
547 pEndpoint->AioMgr.cRequestsActive += cReqs;
548
549 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
550 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
551
552 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
553 if (RT_FAILURE(rc))
554 {
555 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
556 unsigned cReqsResubmit = 0;
557 RTFILEAIOREQ ahReqsResubmit[20];
558
559 /*
560 * We run out of resources.
561 * Need to check which requests got queued
562 * and put the rest on the pending list again.
563 */
564 for (size_t i = 0; i < cReqs; i++)
565 {
566 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
567
568 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
569 {
570 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
571
572 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
573 {
574 /* Mark as not supported. */
575 if (rcReq != VERR_FILE_AIO_NOT_SUBMITTED)
576 {
577 LogFlow(("Async flushes are not supported for this endpoint, disabling\n"));
578 pEndpoint->fAsyncFlushSupported = false;
579 pdmacFileAioMgrNormalRequestFree(pAioMgr, pahReqs[i]);
580 rc = VINF_SUCCESS;
581 }
582 else
583 {
584 AssertMsg(rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES, ("Flush wasn't submitted but we are not out of ressources\n"));
585 /* Clear the pending flush */
586 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
587 Assert(pEndpoint->pFlushReq == pTask);
588 pEndpoint->pFlushReq = NULL;
589 }
590 }
591 else
592 {
593 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
594 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
595
596 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
597 {
598 pTask->hReq = pahReqs[i];
599 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
600 }
601 else
602 {
603 ahReqsResubmit[cReqsResubmit] = pahReqs[i];
604 cReqsResubmit++;
605 }
606 }
607
608 pEndpoint->AioMgr.cRequestsActive--;
609 pAioMgr->cRequestsActive--;
610
611 if (cReqsResubmit == RT_ELEMENTS(ahReqsResubmit))
612 {
613 int rc2 = RTFileAioCtxSubmit(pAioMgr->hAioCtx, ahReqsResubmit, cReqsResubmit);
614 AssertRC(rc2);
615 pEndpoint->AioMgr.cRequestsActive += cReqsResubmit;
616 pAioMgr->cRequestsActive += cReqsResubmit;
617 cReqsResubmit = 0;
618 }
619 }
620
621 /* Resubmit tasks. */
622 if (cReqsResubmit)
623 {
624 int rc2 = RTFileAioCtxSubmit(pAioMgr->hAioCtx, ahReqsResubmit, cReqsResubmit);
625 AssertRC(rc2);
626 pEndpoint->AioMgr.cRequestsActive += cReqsResubmit;
627 pAioMgr->cRequestsActive += cReqsResubmit;
628 cReqsResubmit = 0;
629 }
630 else if ( pEndpoint->pFlushReq
631 && !pAioMgr->cRequestsActive
632 && !pEndpoint->fAsyncFlushSupported)
633 {
634 /*
635 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
636 * the async flush API.
637 * Happens only if this we just noticed that this is not supported
638 * and the only active request was a flush.
639 */
640 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
641 pEndpoint->pFlushReq = NULL;
642 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
643 pdmacFileTaskFree(pEndpoint, pFlush);
644 }
645 }
646
647 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
648 {
649 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
650
651 /* Print an entry in the release log */
652 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
653 {
654 pEpClass->fOutOfResourcesWarningPrinted = true;
655 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
656 pAioMgr->cRequestsActive));
657 }
658 }
659
660 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
661 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
662 }
663
664 return rc;
665}
666
667static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
668 RTFOFF offStart, size_t cbRange,
669 PPDMACTASKFILE pTask)
670{
671 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
672
673 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
674 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
675 ("Invalid task type %d\n", pTask->enmTransferType));
676
677 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
678 if (!pRangeLock)
679 {
680 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
681 /* Check if we intersect with the range. */
682 if ( !pRangeLock
683 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
684 && (pRangeLock->Core.KeyLast) >= offStart))
685 {
686 pRangeLock = NULL; /* False alarm */
687 }
688 }
689
690 /* Check whether we have one of the situations explained below */
691 if ( pRangeLock
692#if 0 /** @todo: later. For now we will just block all requests if they interfere */
693 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
694 || (!pRangeLock->fReadLock)
695#endif
696 )
697 {
698 /* Add to the list. */
699 pTask->pNext = NULL;
700
701 if (!pRangeLock->pWaitingTasksHead)
702 {
703 Assert(!pRangeLock->pWaitingTasksTail);
704 pRangeLock->pWaitingTasksHead = pTask;
705 pRangeLock->pWaitingTasksTail = pTask;
706 }
707 else
708 {
709 AssertPtr(pRangeLock->pWaitingTasksTail);
710 pRangeLock->pWaitingTasksTail->pNext = pTask;
711 pRangeLock->pWaitingTasksTail = pTask;
712 }
713 return true;
714 }
715
716 return false;
717}
718
719static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
720 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
721 RTFOFF offStart, size_t cbRange,
722 PPDMACTASKFILE pTask)
723{
724 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
725 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
726 offStart, cbRange));
727
728 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
729 if (!pRangeLock)
730 return VERR_NO_MEMORY;
731
732 /* Init the lock. */
733 pRangeLock->Core.Key = offStart;
734 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
735 pRangeLock->cRefs = 1;
736 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
737 pRangeLock->pWaitingTasksHead = NULL;
738 pRangeLock->pWaitingTasksTail = NULL;
739
740 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
741 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
742
743 /* Let the task point to its lock. */
744 pTask->pRangeLock = pRangeLock;
745
746 return VINF_SUCCESS;
747}
748
749static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
750 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
751 PPDMACFILERANGELOCK pRangeLock)
752{
753 PPDMACTASKFILE pTasksWaitingHead;
754
755 AssertPtr(pRangeLock);
756 Assert(pRangeLock->cRefs == 1);
757
758 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
759 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
760 pRangeLock->pWaitingTasksHead = NULL;
761 pRangeLock->pWaitingTasksTail = NULL;
762 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
763
764 return pTasksWaitingHead;
765}
766
767static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
768 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
769 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
770{
771 int rc = VINF_SUCCESS;
772 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
773 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
774 void *pvBuf = pTask->DataSeg.pvSeg;
775
776 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
777 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
778 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
779 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
780
781 pTask->fPrefetch = false;
782 pTask->cbBounceBuffer = 0;
783
784 /*
785 * Before we start to setup the request we have to check whether there is a task
786 * already active which range intersects with ours. We have to defer execution
787 * of this task in two cases:
788 * - The pending task is a write and the current is either read or write
789 * - The pending task is a read and the current task is a write task.
790 *
791 * To check whether a range is currently "locked" we use the AVL tree where every pending task
792 * is stored by its file offset range. The current task will be added to the active task
793 * and will be executed when the active one completes. (The method below
794 * which checks whether a range is already used will add the task)
795 *
796 * This is neccessary because of the requirement to align all requests to a 512 boundary
797 * which is enforced by the host OS (Linux and Windows atm). It is possible that
798 * we have to process unaligned tasks and need to align them using bounce buffers.
799 * While the data is fetched from the file another request might arrive writing to
800 * the same range. This will result in data corruption if both are executed concurrently.
801 */
802 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
803
804 if (!fLocked)
805 {
806 /* Get a request handle. */
807 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
808 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
809
810 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
811 {
812 /* Grow the file if needed. */
813 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
814 {
815 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
816 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
817 }
818
819 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
820 pTask->Off, pTask->DataSeg.pvSeg,
821 pTask->DataSeg.cbSeg, pTask);
822 }
823 else
824 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
825 pTask->Off, pTask->DataSeg.pvSeg,
826 pTask->DataSeg.cbSeg, pTask);
827 AssertRC(rc);
828
829 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
830 pTask->DataSeg.cbSeg,
831 pTask);
832
833 if (RT_SUCCESS(rc))
834 *phReq = hReq;
835 }
836 else
837 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
838
839 return rc;
840}
841
842static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
843 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
844 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
845{
846 int rc = VINF_SUCCESS;
847 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
848 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
849 void *pvBuf = pTask->DataSeg.pvSeg;
850
851 /*
852 * Check if the alignment requirements are met.
853 * Offset, transfer size and buffer address
854 * need to be on a 512 boundary.
855 */
856 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
857 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
858 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
859
860 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
861 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
862 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
863 offStart, cbToTransfer, pEndpoint->cbFile));
864
865 pTask->fPrefetch = false;
866
867 /*
868 * Before we start to setup the request we have to check whether there is a task
869 * already active which range intersects with ours. We have to defer execution
870 * of this task in two cases:
871 * - The pending task is a write and the current is either read or write
872 * - The pending task is a read and the current task is a write task.
873 *
874 * To check whether a range is currently "locked" we use the AVL tree where every pending task
875 * is stored by its file offset range. The current task will be added to the active task
876 * and will be executed when the active one completes. (The method below
877 * which checks whether a range is already used will add the task)
878 *
879 * This is neccessary because of the requirement to align all requests to a 512 boundary
880 * which is enforced by the host OS (Linux and Windows atm). It is possible that
881 * we have to process unaligned tasks and need to align them using bounce buffers.
882 * While the data is fetched from the file another request might arrive writing to
883 * the same range. This will result in data corruption if both are executed concurrently.
884 */
885 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
886
887 if (!fLocked)
888 {
889 /* Get a request handle. */
890 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
891 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
892
893 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
894 || RT_UNLIKELY(offStart != pTask->Off)
895 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
896 {
897 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
898 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
899
900 /* Create bounce buffer. */
901 pTask->cbBounceBuffer = cbToTransfer;
902
903 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
904 pTask->Off, offStart));
905 pTask->offBounceBuffer = pTask->Off - offStart;
906
907 /** @todo: I think we need something like a RTMemAllocAligned method here.
908 * Current assumption is that the maximum alignment is 4096byte
909 * (GPT disk on Windows)
910 * so we can use RTMemPageAlloc here.
911 */
912 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
913 if (RT_LIKELY(pTask->pvBounceBuffer))
914 {
915 pvBuf = pTask->pvBounceBuffer;
916
917 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
918 {
919 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
920 || RT_UNLIKELY(offStart != pTask->Off))
921 {
922 /* We have to fill the buffer first before we can update the data. */
923 LogFlow(("Prefetching data for task %#p\n", pTask));
924 pTask->fPrefetch = true;
925 enmTransferType = PDMACTASKFILETRANSFER_READ;
926 }
927 else
928 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
929 }
930 }
931 else
932 rc = VERR_NO_MEMORY;
933 }
934 else
935 pTask->cbBounceBuffer = 0;
936
937 if (RT_SUCCESS(rc))
938 {
939 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
940 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
941
942 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
943 {
944 /* Grow the file if needed. */
945 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
946 {
947 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
948 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
949 }
950
951 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
952 offStart, pvBuf, cbToTransfer, pTask);
953 }
954 else
955 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
956 offStart, pvBuf, cbToTransfer, pTask);
957 AssertRC(rc);
958
959 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
960
961 if (RT_SUCCESS(rc))
962 *phReq = hReq;
963 else
964 {
965 /* Cleanup */
966 if (pTask->cbBounceBuffer)
967 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
968 }
969 }
970 }
971 else
972 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
973
974 return rc;
975}
976
977static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
978 PPDMACEPFILEMGR pAioMgr,
979 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
980{
981 RTFILEAIOREQ apReqs[20];
982 unsigned cRequests = 0;
983 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
984 int rc = VINF_SUCCESS;
985
986 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
987 ("Trying to process request lists of a non active endpoint!\n"));
988
989 /* Go through the list and queue the requests until we get a flush request */
990 while ( pTaskHead
991 && !pEndpoint->pFlushReq
992 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
993 && RT_SUCCESS(rc))
994 {
995 PPDMACTASKFILE pCurr = pTaskHead;
996
997 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, (uint32_t)pCurr->DataSeg.cbSeg))
998 {
999 pAioMgr->fBwLimitReached = true;
1000 break;
1001 }
1002
1003 pTaskHead = pTaskHead->pNext;
1004
1005 pCurr->pNext = NULL;
1006
1007 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
1008 ("Endpoints do not match\n"));
1009
1010 switch (pCurr->enmTransferType)
1011 {
1012 case PDMACTASKFILETRANSFER_FLUSH:
1013 {
1014 /* If there is no data transfer request this flush request finished immediately. */
1015 if (pEndpoint->fAsyncFlushSupported)
1016 {
1017 /* Issue a flush to the host. */
1018 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1019 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1020
1021 LogFlow(("Flush request %#p\n", hReq));
1022
1023 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->File, pCurr);
1024 if (RT_FAILURE(rc))
1025 {
1026 pEndpoint->fAsyncFlushSupported = false;
1027 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1028 rc = VINF_SUCCESS; /* Fake success */
1029 }
1030 else
1031 {
1032 apReqs[cRequests] = hReq;
1033 pEndpoint->AioMgr.cReqsProcessed++;
1034 cRequests++;
1035 }
1036 }
1037
1038 if ( !pEndpoint->AioMgr.cRequestsActive
1039 && !pEndpoint->fAsyncFlushSupported)
1040 {
1041 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1042 pdmacFileTaskFree(pEndpoint, pCurr);
1043 }
1044 else
1045 {
1046 Assert(!pEndpoint->pFlushReq);
1047 pEndpoint->pFlushReq = pCurr;
1048 }
1049 break;
1050 }
1051 case PDMACTASKFILETRANSFER_READ:
1052 case PDMACTASKFILETRANSFER_WRITE:
1053 {
1054 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1055
1056 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1057 {
1058 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1059 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1060 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1061 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1062 else
1063 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1064
1065 AssertRC(rc);
1066 }
1067 else
1068 {
1069 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1070 hReq = pCurr->hReq;
1071 }
1072
1073 LogFlow(("Read/Write request %#p\n", hReq));
1074
1075 if (hReq != NIL_RTFILEAIOREQ)
1076 {
1077 apReqs[cRequests] = hReq;
1078 cRequests++;
1079 }
1080 break;
1081 }
1082 default:
1083 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1084 } /* switch transfer type */
1085
1086 /* Queue the requests if the array is full. */
1087 if (cRequests == RT_ELEMENTS(apReqs))
1088 {
1089 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1090 cRequests = 0;
1091 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1092 ("Unexpected return code\n"));
1093 }
1094 }
1095
1096 if (cRequests)
1097 {
1098 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1099 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1100 ("Unexpected return code rc=%Rrc\n", rc));
1101 }
1102
1103 if (pTaskHead)
1104 {
1105 /* Add the rest of the tasks to the pending list */
1106 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1107
1108 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1109 && !pEndpoint->pFlushReq
1110 && !pAioMgr->fBwLimitReached))
1111 {
1112#if 0
1113 /*
1114 * The I/O manager has no room left for more requests
1115 * but there are still requests to process.
1116 * Create a new I/O manager and let it handle some endpoints.
1117 */
1118 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1119#else
1120 /* Grow the I/O manager */
1121 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1122#endif
1123 }
1124 }
1125
1126 /* Insufficient resources are not fatal. */
1127 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1128 rc = VINF_SUCCESS;
1129
1130 return rc;
1131}
1132
1133/**
1134 * Adds all pending requests for the given endpoint
1135 * until a flush request is encountered or there is no
1136 * request anymore.
1137 *
1138 * @returns VBox status code.
1139 * @param pAioMgr The async I/O manager for the endpoint
1140 * @param pEndpoint The endpoint to get the requests from.
1141 */
1142static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1143 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1144{
1145 int rc = VINF_SUCCESS;
1146 PPDMACTASKFILE pTasksHead = NULL;
1147
1148 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1149 ("Trying to process request lists of a non active endpoint!\n"));
1150
1151 Assert(!pEndpoint->pFlushReq);
1152
1153 /* Check the pending list first */
1154 if (pEndpoint->AioMgr.pReqsPendingHead)
1155 {
1156 LogFlow(("Queuing pending requests first\n"));
1157
1158 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1159 /*
1160 * Clear the list as the processing routine will insert them into the list
1161 * again if it gets a flush request.
1162 */
1163 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1164 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1165 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1166 AssertRC(rc);
1167 }
1168
1169 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1170 {
1171 /* Now the request queue. */
1172 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1173 if (pTasksHead)
1174 {
1175 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1176 AssertRC(rc);
1177 }
1178 }
1179
1180 return rc;
1181}
1182
1183static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1184{
1185 int rc = VINF_SUCCESS;
1186 bool fNotifyWaiter = false;
1187
1188 LogFlowFunc((": Enter\n"));
1189
1190 Assert(pAioMgr->fBlockingEventPending);
1191
1192 switch (pAioMgr->enmBlockingEvent)
1193 {
1194 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1195 {
1196 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1197 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1198
1199 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1200
1201 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1202 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1203 if (pAioMgr->pEndpointsHead)
1204 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1205 pAioMgr->pEndpointsHead = pEndpointNew;
1206
1207 /* Assign the completion point to this file. */
1208 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
1209 fNotifyWaiter = true;
1210 pAioMgr->cEndpoints++;
1211 break;
1212 }
1213 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1214 {
1215 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1216 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1217
1218 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1219 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1220 break;
1221 }
1222 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1223 {
1224 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1225 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1226
1227 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1228 {
1229 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1230
1231 /* Make sure all tasks finished. Process the queues a last time first. */
1232 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1233 AssertRC(rc);
1234
1235 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1236 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1237 }
1238 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1239 && (!pEndpointClose->AioMgr.cRequestsActive))
1240 fNotifyWaiter = true;
1241 break;
1242 }
1243 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1244 {
1245 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1246 if (!pAioMgr->cRequestsActive)
1247 fNotifyWaiter = true;
1248 break;
1249 }
1250 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1251 {
1252 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1253 break;
1254 }
1255 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1256 {
1257 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1258 fNotifyWaiter = true;
1259 break;
1260 }
1261 default:
1262 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1263 }
1264
1265 if (fNotifyWaiter)
1266 {
1267 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1268 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1269
1270 /* Release the waiting thread. */
1271 LogFlow(("Signalling waiter\n"));
1272 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1273 AssertRC(rc);
1274 }
1275
1276 LogFlowFunc((": Leave\n"));
1277 return rc;
1278}
1279
1280/**
1281 * Checks all endpoints for pending events or new requests.
1282 *
1283 * @returns VBox status code.
1284 * @param pAioMgr The I/O manager handle.
1285 */
1286static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1287{
1288 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1289 int rc = VINF_SUCCESS;
1290 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1291
1292 pAioMgr->fBwLimitReached = false;
1293
1294 while (pEndpoint)
1295 {
1296 if (!pEndpoint->pFlushReq
1297 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1298 && !pEndpoint->AioMgr.fMoving)
1299 {
1300 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1301 if (RT_FAILURE(rc))
1302 return rc;
1303 }
1304 else if ( !pEndpoint->AioMgr.cRequestsActive
1305 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1306 {
1307 /* Reopen the file so that the new endpoint can reassociate with the file */
1308 RTFileClose(pEndpoint->File);
1309 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1310 AssertRC(rc);
1311
1312 if (pEndpoint->AioMgr.fMoving)
1313 {
1314 pEndpoint->AioMgr.fMoving = false;
1315 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1316 }
1317 else
1318 {
1319 Assert(pAioMgr->fBlockingEventPending);
1320 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1321
1322 /* Release the waiting thread. */
1323 LogFlow(("Signalling waiter\n"));
1324 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1325 AssertRC(rc);
1326 }
1327 }
1328
1329 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1330 }
1331
1332 return rc;
1333}
1334
1335static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1336{
1337 int rc = VINF_SUCCESS;
1338 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1339 size_t cbTransfered = 0;
1340 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1341 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1342 PPDMACTASKFILE pTasksWaiting;
1343
1344 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1345
1346 pEndpoint = pTask->pEndpoint;
1347
1348 pTask->hReq = NIL_RTFILEAIOREQ;
1349
1350 pAioMgr->cRequestsActive--;
1351 pEndpoint->AioMgr.cRequestsActive--;
1352 pEndpoint->AioMgr.cReqsProcessed++;
1353
1354 /*
1355 * It is possible that the request failed on Linux with kernels < 2.6.23
1356 * if the passed buffer was allocated with remap_pfn_range or if the file
1357 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1358 * The endpoint will be migrated to a failsafe manager in case a request fails.
1359 */
1360 if (RT_FAILURE(rcReq))
1361 {
1362 /* Free bounce buffers and the IPRT request. */
1363 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1364
1365 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1366 {
1367 LogFlow(("Async flushes are not supported for this endpoint, disabling\n"));
1368 pEndpoint->fAsyncFlushSupported = false;
1369 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1370 /* The other method will take over now. */
1371 }
1372 else
1373 {
1374 /* Free the lock and process pending tasks if neccessary */
1375 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1376 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1377 AssertRC(rc);
1378
1379 if (pTask->cbBounceBuffer)
1380 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1381
1382 /*
1383 * Fatal errors are reported to the guest and non-fatal errors
1384 * will cause a migration to the failsafe manager in the hope
1385 * that the error disappears.
1386 */
1387 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1388 {
1389 /* Queue the request on the pending list. */
1390 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1391 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1392
1393 /* Create a new failsafe manager if neccessary. */
1394 if (!pEndpoint->AioMgr.fMoving)
1395 {
1396 PPDMACEPFILEMGR pAioMgrFailsafe;
1397
1398 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1399 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1400
1401 pEndpoint->AioMgr.fMoving = true;
1402
1403 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1404 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1405 AssertRC(rc);
1406
1407 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1408
1409 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1410 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1411 }
1412
1413 /* If this was the last request for the endpoint migrate it to the new manager. */
1414 if (!pEndpoint->AioMgr.cRequestsActive)
1415 {
1416 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1417 Assert(!fReqsPending);
1418
1419 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1420 AssertRC(rc);
1421 }
1422 }
1423 else
1424 {
1425 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1426 pdmacFileTaskFree(pEndpoint, pTask);
1427 }
1428 }
1429 }
1430 else
1431 {
1432 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1433 {
1434 /* Clear pending flush */
1435 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1436 pEndpoint->pFlushReq = NULL;
1437 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1438
1439 /* Call completion callback */
1440 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1441 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1442 pdmacFileTaskFree(pEndpoint, pTask);
1443 }
1444 else
1445 {
1446 /*
1447 * Restart an incomplete transfer.
1448 * This usually means that the request will return an error now
1449 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1450 * the transfer needs to be continued.
1451 */
1452 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1453 || ( pTask->cbBounceBuffer
1454 && cbTransfered < pTask->cbBounceBuffer)))
1455 {
1456 RTFOFF offStart;
1457 size_t cbToTransfer;
1458 uint8_t *pbBuf = NULL;
1459
1460 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transfered)\n",
1461 pTask, cbTransfered));
1462 Assert(cbTransfered % 512 == 0);
1463
1464 if (pTask->cbBounceBuffer)
1465 {
1466 AssertPtr(pTask->pvBounceBuffer);
1467 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1468 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1469 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1470 }
1471 else
1472 {
1473 Assert(!pTask->pvBounceBuffer);
1474 offStart = pTask->Off + cbTransfered;
1475 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1476 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1477 }
1478
1479 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1480 {
1481 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File, offStart,
1482 pbBuf, cbToTransfer, pTask);
1483 }
1484 else
1485 {
1486 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1487 ("Invalid transfer type\n"));
1488 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File, offStart,
1489 pbBuf, cbToTransfer, pTask);
1490 }
1491
1492 AssertRC(rc);
1493 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1494 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1495 ("Unexpected return code rc=%Rrc\n", rc));
1496 }
1497 else if (pTask->fPrefetch)
1498 {
1499 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1500 Assert(pTask->cbBounceBuffer);
1501
1502 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1503 pTask->DataSeg.pvSeg,
1504 pTask->DataSeg.cbSeg);
1505
1506 /* Write it now. */
1507 pTask->fPrefetch = false;
1508 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1509 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1510
1511 /* Grow the file if needed. */
1512 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1513 {
1514 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1515 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1516 }
1517
1518 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1519 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1520 AssertRC(rc);
1521 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1522 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1523 ("Unexpected return code rc=%Rrc\n", rc));
1524 }
1525 else
1526 {
1527 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1528 {
1529 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1530 memcpy(pTask->DataSeg.pvSeg,
1531 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1532 pTask->DataSeg.cbSeg);
1533
1534 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1535 }
1536
1537 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1538
1539 /* Free the lock and process pending tasks if neccessary */
1540 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1541 if (pTasksWaiting)
1542 {
1543 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1544 AssertRC(rc);
1545 }
1546
1547 /* Call completion callback */
1548 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1549 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1550 pdmacFileTaskFree(pEndpoint, pTask);
1551
1552 /*
1553 * If there is no request left on the endpoint but a flush request is set
1554 * it completed now and we notify the owner.
1555 * Furthermore we look for new requests and continue.
1556 */
1557 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1558 {
1559 /* Call completion callback */
1560 pTask = pEndpoint->pFlushReq;
1561 pEndpoint->pFlushReq = NULL;
1562
1563 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1564
1565 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1566 pdmacFileTaskFree(pEndpoint, pTask);
1567 }
1568 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1569 {
1570 /* If the endpoint is about to be migrated do it now. */
1571 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1572 Assert(!fReqsPending);
1573
1574 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1575 AssertRC(rc);
1576 }
1577 }
1578 } /* Not a flush request */
1579 } /* request completed successfully */
1580}
1581
1582/** Helper macro for checking for error codes. */
1583#define CHECK_RC(pAioMgr, rc) \
1584 if (RT_FAILURE(rc)) \
1585 {\
1586 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1587 return rc2;\
1588 }
1589
1590/**
1591 * The normal I/O manager using the RTFileAio* API
1592 *
1593 * @returns VBox status code.
1594 * @param ThreadSelf Handle of the thread.
1595 * @param pvUser Opaque user data.
1596 */
1597int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1598{
1599 int rc = VINF_SUCCESS;
1600 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1601 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1602
1603 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1604 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING)
1605 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1606 {
1607 if (!pAioMgr->cRequestsActive)
1608 {
1609 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1610 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1611 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1612 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1613 AssertRC(rc);
1614
1615 LogFlow(("Got woken up\n"));
1616 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1617 }
1618
1619 /* Check for an external blocking event first. */
1620 if (pAioMgr->fBlockingEventPending)
1621 {
1622 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1623 CHECK_RC(pAioMgr, rc);
1624 }
1625
1626 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1627 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1628 {
1629 /* We got woken up because an endpoint issued new requests. Queue them. */
1630 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1631 CHECK_RC(pAioMgr, rc);
1632
1633 while ( pAioMgr->cRequestsActive
1634 || pAioMgr->fBwLimitReached)
1635 {
1636 if (pAioMgr->cRequestsActive)
1637 {
1638 RTFILEAIOREQ apReqs[20];
1639 uint32_t cReqsCompleted = 0;
1640 size_t cReqsWait;
1641
1642 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1643 cReqsWait = RT_ELEMENTS(apReqs);
1644 else
1645 cReqsWait = pAioMgr->cRequestsActive;
1646
1647 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1648
1649 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1650 1,
1651 RT_INDEFINITE_WAIT, apReqs,
1652 cReqsWait, &cReqsCompleted);
1653 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1654 CHECK_RC(pAioMgr, rc);
1655
1656 LogFlow(("%d tasks completed\n", cReqsCompleted));
1657
1658 for (uint32_t i = 0; i < cReqsCompleted; i++)
1659 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1660
1661 /* Check for an external blocking event before we go to sleep again. */
1662 if (pAioMgr->fBlockingEventPending)
1663 {
1664 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1665 CHECK_RC(pAioMgr, rc);
1666 }
1667
1668 /* Update load statistics. */
1669 uint64_t uMillisCurr = RTTimeMilliTS();
1670 if (uMillisCurr > uMillisEnd)
1671 {
1672 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1673
1674 /* Calculate timespan. */
1675 uMillisCurr -= uMillisEnd;
1676
1677 while (pEndpointCurr)
1678 {
1679 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1680 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1681 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1682 }
1683
1684 /* Set new update interval */
1685 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1686 }
1687 }
1688 else
1689 {
1690 /*
1691 * Bandwidth limit reached for all endpoints.
1692 * Yield and wait until we have enough resources again.
1693 */
1694 RTThreadYield();
1695 }
1696
1697 /* Check endpoints for new requests. */
1698 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1699 {
1700 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1701 CHECK_RC(pAioMgr, rc);
1702 }
1703 } /* while requests are active. */
1704
1705 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1706 {
1707 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1708 AssertRC(rc);
1709 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1710
1711 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1712 CHECK_RC(pAioMgr, rc);
1713 }
1714 } /* if still running */
1715 } /* while running */
1716
1717 LogFlowFunc(("rc=%Rrc\n", rc));
1718 return rc;
1719}
1720
1721#undef CHECK_RC
1722
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette