VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 80239

Last change on this file since 80239 was 80191, checked in by vboxsync, 5 years ago

VMM/r3: Refactored VMCPU enumeration in preparation that aCpus will be replaced with a pointer array. Removed two raw-mode offset members from the CPUM and CPUMCPU sub-structures. bugref:9217 bugref:9517

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 66.8 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 80191 2019-08-08 00:36:57Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define VBOX_BUGREF_9217_PART_I
23#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
24#include <iprt/types.h>
25#include <iprt/asm.h>
26#include <iprt/file.h>
27#include <iprt/mem.h>
28#include <iprt/string.h>
29#include <iprt/assert.h>
30#include <VBox/log.h>
31
32#include "PDMAsyncCompletionFileInternal.h"
33
34
35/*********************************************************************************************************************************
36* Defined Constants And Macros *
37*********************************************************************************************************************************/
38/** The update period for the I/O load statistics in ms. */
39#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
40/** Maximum number of requests a manager will handle. */
41#define PDMACEPFILEMGR_REQS_STEP 64
42
43
44/*********************************************************************************************************************************
45* Internal functions *
46*********************************************************************************************************************************/
47static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
48 PPDMACEPFILEMGR pAioMgr,
49 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
50
51static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
52 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
53 PPDMACFILERANGELOCK pRangeLock);
54
55static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
56 int rc, size_t cbTransfered);
57
58
59int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
60{
61 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
62
63 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
64 if (rc == VERR_OUT_OF_RANGE)
65 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
66
67 if (RT_SUCCESS(rc))
68 {
69 /* Initialize request handle array. */
70 pAioMgr->iFreeEntry = 0;
71 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
72 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
73
74 if (pAioMgr->pahReqsFree)
75 {
76 /* Create the range lock memcache. */
77 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
78 0, UINT32_MAX, NULL, NULL, NULL, 0);
79 if (RT_SUCCESS(rc))
80 return VINF_SUCCESS;
81
82 RTMemFree(pAioMgr->pahReqsFree);
83 }
84 else
85 {
86 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
87 rc = VERR_NO_MEMORY;
88 }
89 }
90
91 return rc;
92}
93
94void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
95{
96 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
97
98 while (pAioMgr->iFreeEntry > 0)
99 {
100 pAioMgr->iFreeEntry--;
101 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
102 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
103 }
104
105 RTMemFree(pAioMgr->pahReqsFree);
106 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
107}
108
109#if 0 /* currently unused */
110/**
111 * Sorts the endpoint list with insertion sort.
112 */
113static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
114{
115 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
116
117 pEpPrev = pAioMgr->pEndpointsHead;
118 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
119
120 while (pEpCurr)
121 {
122 /* Remember the next element to sort because the list might change. */
123 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
124
125 /* Unlink the current element from the list. */
126 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
127 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
128
129 if (pPrev)
130 pPrev->AioMgr.pEndpointNext = pNext;
131 else
132 pAioMgr->pEndpointsHead = pNext;
133
134 if (pNext)
135 pNext->AioMgr.pEndpointPrev = pPrev;
136
137 /* Go back until we reached the place to insert the current endpoint into. */
138 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
139 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
140
141 /* Link the endpoint into the list. */
142 if (pEpPrev)
143 pNext = pEpPrev->AioMgr.pEndpointNext;
144 else
145 pNext = pAioMgr->pEndpointsHead;
146
147 pEpCurr->AioMgr.pEndpointNext = pNext;
148 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
149
150 if (pNext)
151 pNext->AioMgr.pEndpointPrev = pEpCurr;
152
153 if (pEpPrev)
154 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
155 else
156 pAioMgr->pEndpointsHead = pEpCurr;
157
158 pEpCurr = pEpNextToSort;
159 }
160
161#ifdef DEBUG
162 /* Validate sorting algorithm */
163 unsigned cEndpoints = 0;
164 pEpCurr = pAioMgr->pEndpointsHead;
165
166 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
167 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
168
169 while (pEpCurr)
170 {
171 cEndpoints++;
172
173 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
174 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
175
176 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
177 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
178
179 pEpCurr = pNext;
180 }
181
182 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
183
184#endif
185}
186#endif /* currently unused */
187
188/**
189 * Removes an endpoint from the currently assigned manager.
190 *
191 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
192 * FALSE otherwise.
193 * @param pEndpointRemove The endpoint to remove.
194 */
195static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
196{
197 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
198 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
199 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
200
201 pAioMgr->cEndpoints--;
202
203 if (pPrev)
204 pPrev->AioMgr.pEndpointNext = pNext;
205 else
206 pAioMgr->pEndpointsHead = pNext;
207
208 if (pNext)
209 pNext->AioMgr.pEndpointPrev = pPrev;
210
211 /* Make sure that there is no request pending on this manager for the endpoint. */
212 if (!pEndpointRemove->AioMgr.cRequestsActive)
213 {
214 Assert(!pEndpointRemove->pFlushReq);
215
216 /* Reopen the file so that the new endpoint can re-associate with the file */
217 RTFileClose(pEndpointRemove->hFile);
218 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
219 AssertRC(rc);
220 return false;
221 }
222
223 return true;
224}
225
226#if 0 /* currently unused */
227
228static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
229{
230 /* Balancing doesn't make sense with only one endpoint. */
231 if (pAioMgr->cEndpoints == 1)
232 return false;
233
234 /* Doesn't make sens to move endpoints if only one produces the whole load */
235 unsigned cEndpointsWithLoad = 0;
236
237 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
238
239 while (pCurr)
240 {
241 if (pCurr->AioMgr.cReqsPerSec)
242 cEndpointsWithLoad++;
243
244 pCurr = pCurr->AioMgr.pEndpointNext;
245 }
246
247 return (cEndpointsWithLoad > 1);
248}
249
250/**
251 * Creates a new I/O manager and spreads the I/O load of the endpoints
252 * between the given I/O manager and the new one.
253 *
254 * @returns nothing.
255 * @param pAioMgr The I/O manager with high I/O load.
256 */
257static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
258{
259 /*
260 * Check if balancing would improve the situation.
261 */
262 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
263 {
264 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
265 PPDMACEPFILEMGR pAioMgrNew = NULL;
266
267 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
268 if (RT_SUCCESS(rc))
269 {
270 /* We will sort the list by request count per second. */
271 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
272
273 /* Now move some endpoints to the new manager. */
274 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
275 unsigned cReqsOther = 0;
276 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
277
278 while (pCurr)
279 {
280 if (cReqsHere <= cReqsOther)
281 {
282 /*
283 * The other manager has more requests to handle now.
284 * We will keep the current endpoint.
285 */
286 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
287 cReqsHere += pCurr->AioMgr.cReqsPerSec;
288 pCurr = pCurr->AioMgr.pEndpointNext;
289 }
290 else
291 {
292 /* Move to other endpoint. */
293 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
294 cReqsOther += pCurr->AioMgr.cReqsPerSec;
295
296 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
297
298 pCurr = pCurr->AioMgr.pEndpointNext;
299
300 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
301
302 if (fReqsPending)
303 {
304 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
305 pMove->AioMgr.fMoving = true;
306 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
307 }
308 else
309 {
310 pMove->AioMgr.fMoving = false;
311 pMove->AioMgr.pAioMgrDst = NULL;
312 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
313 }
314 }
315 }
316 }
317 else
318 {
319 /* Don't process further but leave a log entry about reduced performance. */
320 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
321 }
322 }
323 else
324 Log(("AIOMgr: Load balancing would not improve anything\n"));
325}
326
327#endif /* unused */
328
329/**
330 * Increase the maximum number of active requests for the given I/O manager.
331 *
332 * @returns VBox status code.
333 * @param pAioMgr The I/O manager to grow.
334 */
335static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
336{
337 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
338
339 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
340 && !pAioMgr->cRequestsActive,
341 ("Invalid state of the I/O manager\n"));
342
343#ifdef RT_OS_WINDOWS
344 /*
345 * Reopen the files of all assigned endpoints first so we can assign them to the new
346 * I/O context.
347 */
348 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
349
350 while (pCurr)
351 {
352 RTFileClose(pCurr->hFile);
353 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
354
355 pCurr = pCurr->AioMgr.pEndpointNext;
356 }
357#endif
358
359 /* Create the new bigger context. */
360 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
361
362 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
363 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
364 if (rc == VERR_OUT_OF_RANGE)
365 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
366
367 if (RT_SUCCESS(rc))
368 {
369 /* Close the old context. */
370 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
371 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
372
373 pAioMgr->hAioCtx = hAioCtxNew;
374
375 /* Create a new I/O task handle array */
376 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
377 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
378
379 if (pahReqNew)
380 {
381 /* Copy the cached request handles. */
382 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
383 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
384
385 RTMemFree(pAioMgr->pahReqsFree);
386 pAioMgr->pahReqsFree = pahReqNew;
387 pAioMgr->cReqEntries = cReqEntriesNew;
388 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
389 pAioMgr->cRequestsActiveMax));
390 }
391 else
392 rc = VERR_NO_MEMORY;
393 }
394
395#ifdef RT_OS_WINDOWS
396 /* Assign the file to the new context. */
397 pCurr = pAioMgr->pEndpointsHead;
398 while (pCurr)
399 {
400 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
401 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
402
403 pCurr = pCurr->AioMgr.pEndpointNext;
404 }
405#endif
406
407 if (RT_FAILURE(rc))
408 {
409 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
410 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
411 }
412
413 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
414 LogFlowFunc(("returns rc=%Rrc\n", rc));
415
416 return rc;
417}
418
419/**
420 * Checks if a given status code is fatal.
421 * Non fatal errors can be fixed by migrating the endpoint to a
422 * failsafe manager.
423 *
424 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
425 * false If the error can be fixed by a migration. (image on NFS disk for example)
426 * @param rcReq The status code to check.
427 */
428DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
429{
430 return rcReq == VERR_DEV_IO_ERROR
431 || rcReq == VERR_FILE_IO_ERROR
432 || rcReq == VERR_DISK_IO_ERROR
433 || rcReq == VERR_DISK_FULL
434 || rcReq == VERR_FILE_TOO_BIG;
435}
436
437/**
438 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
439 *
440 * @returns VBox status code
441 * @param pAioMgr The I/O manager the error occurred on.
442 * @param rc The error code.
443 * @param SRC_POS The source location of the error (use RT_SRC_POS).
444 */
445static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
446{
447 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
448 pAioMgr, rc));
449 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
450 LogRel(("AIOMgr: Please contact the product vendor\n"));
451
452 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
453
454 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
455 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
456
457 AssertMsgFailed(("Implement\n"));
458 return VINF_SUCCESS;
459}
460
461/**
462 * Put a list of tasks in the pending request list of an endpoint.
463 */
464DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
465{
466 /* Add the rest of the tasks to the pending list */
467 if (!pEndpoint->AioMgr.pReqsPendingHead)
468 {
469 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
470 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
471 }
472 else
473 {
474 Assert(pEndpoint->AioMgr.pReqsPendingTail);
475 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
476 }
477
478 /* Update the tail. */
479 while (pTaskHead->pNext)
480 pTaskHead = pTaskHead->pNext;
481
482 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
483 pTaskHead->pNext = NULL;
484}
485
486/**
487 * Put one task in the pending request list of an endpoint.
488 */
489DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
490{
491 /* Add the rest of the tasks to the pending list */
492 if (!pEndpoint->AioMgr.pReqsPendingHead)
493 {
494 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
495 pEndpoint->AioMgr.pReqsPendingHead = pTask;
496 }
497 else
498 {
499 Assert(pEndpoint->AioMgr.pReqsPendingTail);
500 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
501 }
502
503 pEndpoint->AioMgr.pReqsPendingTail = pTask;
504 pTask->pNext = NULL;
505}
506
507/**
508 * Allocates a async I/O request.
509 *
510 * @returns Handle to the request.
511 * @param pAioMgr The I/O manager.
512 */
513static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
514{
515 /* Get a request handle. */
516 RTFILEAIOREQ hReq;
517 if (pAioMgr->iFreeEntry > 0)
518 {
519 pAioMgr->iFreeEntry--;
520 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
521 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
522 Assert(hReq != NIL_RTFILEAIOREQ);
523 }
524 else
525 {
526 int rc = RTFileAioReqCreate(&hReq);
527 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
528 }
529
530 return hReq;
531}
532
533/**
534 * Frees a async I/O request handle.
535 *
536 * @returns nothing.
537 * @param pAioMgr The I/O manager.
538 * @param hReq The I/O request handle to free.
539 */
540static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
541{
542 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
543 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
544
545 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
546 pAioMgr->iFreeEntry++;
547}
548
549/**
550 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
551 */
552static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
553 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
554 PRTFILEAIOREQ pahReqs, unsigned cReqs)
555{
556 pAioMgr->cRequestsActive += cReqs;
557 pEndpoint->AioMgr.cRequestsActive += cReqs;
558
559 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
560 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
561
562 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
563 if (RT_FAILURE(rc))
564 {
565 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
566 {
567 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
568
569 /* Append any not submitted task to the waiting list. */
570 for (size_t i = 0; i < cReqs; i++)
571 {
572 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
573
574 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
575 {
576 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
577
578 Assert(pTask->hReq == pahReqs[i]);
579 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
580 pAioMgr->cRequestsActive--;
581 pEndpoint->AioMgr.cRequestsActive--;
582
583 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
584 {
585 /* Clear the pending flush */
586 Assert(pEndpoint->pFlushReq == pTask);
587 pEndpoint->pFlushReq = NULL;
588 }
589 }
590 }
591
592 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
593
594 /* Print an entry in the release log */
595 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
596 {
597 pEpClass->fOutOfResourcesWarningPrinted = true;
598 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
599 pAioMgr->cRequestsActive));
600 }
601
602 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
603 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
604 rc = VINF_SUCCESS;
605 }
606 else /* Another kind of error happened (full disk, ...) */
607 {
608 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
609 for (size_t i = 0; i < cReqs; i++)
610 {
611 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
612
613 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
614 {
615 /* We call ourself again to do any error handling which might come up now. */
616 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
617 AssertRC(rc);
618 }
619 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
620 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
621 }
622
623
624 if ( pEndpoint->pFlushReq
625 && !pAioMgr->cRequestsActive
626 && !pEndpoint->fAsyncFlushSupported)
627 {
628 /*
629 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
630 * the async flush API.
631 * Happens only if this we just noticed that this is not supported
632 * and the only active request was a flush.
633 */
634 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
635 pEndpoint->pFlushReq = NULL;
636 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
637 pdmacFileTaskFree(pEndpoint, pFlush);
638 }
639 }
640 }
641
642 return VINF_SUCCESS;
643}
644
645static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
646 RTFOFF offStart, size_t cbRange,
647 PPDMACTASKFILE pTask, bool fAlignedReq)
648{
649 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
650 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
651 ("Invalid task type %d\n", pTask->enmTransferType));
652
653 /*
654 * If there is no unaligned request active and the current one is aligned
655 * just pass it through.
656 */
657 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
658 return false;
659
660 PPDMACFILERANGELOCK pRangeLock;
661 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
662 if (!pRangeLock)
663 {
664 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
665 /* Check if we intersect with the range. */
666 if ( !pRangeLock
667 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
668 && (pRangeLock->Core.KeyLast) >= offStart))
669 {
670 pRangeLock = NULL; /* False alarm */
671 }
672 }
673
674 /* Check whether we have one of the situations explained below */
675 if (pRangeLock)
676 {
677 /* Add to the list. */
678 pTask->pNext = NULL;
679
680 if (!pRangeLock->pWaitingTasksHead)
681 {
682 Assert(!pRangeLock->pWaitingTasksTail);
683 pRangeLock->pWaitingTasksHead = pTask;
684 pRangeLock->pWaitingTasksTail = pTask;
685 }
686 else
687 {
688 AssertPtr(pRangeLock->pWaitingTasksTail);
689 pRangeLock->pWaitingTasksTail->pNext = pTask;
690 pRangeLock->pWaitingTasksTail = pTask;
691 }
692 return true;
693 }
694
695 return false;
696}
697
698static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
699 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
700 RTFOFF offStart, size_t cbRange,
701 PPDMACTASKFILE pTask, bool fAlignedReq)
702{
703 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
704 pAioMgr, pEndpoint, offStart, cbRange, pTask));
705
706 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
707 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
708 offStart, cbRange));
709
710 /*
711 * If there is no unaligned request active and the current one is aligned
712 * just don't use the lock.
713 */
714 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
715 {
716 pTask->pRangeLock = NULL;
717 return VINF_SUCCESS;
718 }
719
720 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
721 if (!pRangeLock)
722 return VERR_NO_MEMORY;
723
724 /* Init the lock. */
725 pRangeLock->Core.Key = offStart;
726 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
727 pRangeLock->cRefs = 1;
728 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
729 pRangeLock->pWaitingTasksHead = NULL;
730 pRangeLock->pWaitingTasksTail = NULL;
731
732 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
733 AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted);
734
735 /* Let the task point to its lock. */
736 pTask->pRangeLock = pRangeLock;
737 pEndpoint->AioMgr.cLockedReqsActive++;
738
739 return VINF_SUCCESS;
740}
741
742static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
743 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
744 PPDMACFILERANGELOCK pRangeLock)
745{
746 PPDMACTASKFILE pTasksWaitingHead;
747
748 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
749 pAioMgr, pEndpoint, pRangeLock));
750
751 /* pRangeLock can be NULL if there was no lock assigned with the task. */
752 if (!pRangeLock)
753 return NULL;
754
755 Assert(pRangeLock->cRefs == 1);
756
757 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
758 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
759 pRangeLock->pWaitingTasksHead = NULL;
760 pRangeLock->pWaitingTasksTail = NULL;
761 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
762 pEndpoint->AioMgr.cLockedReqsActive--;
763
764 return pTasksWaitingHead;
765}
766
767static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
768 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
769 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
770{
771 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
772 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
773 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
774 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
775
776 pTask->fPrefetch = false;
777 pTask->cbBounceBuffer = 0;
778
779 /*
780 * Before we start to setup the request we have to check whether there is a task
781 * already active which range intersects with ours. We have to defer execution
782 * of this task in two cases:
783 * - The pending task is a write and the current is either read or write
784 * - The pending task is a read and the current task is a write task.
785 *
786 * To check whether a range is currently "locked" we use the AVL tree where every pending task
787 * is stored by its file offset range. The current task will be added to the active task
788 * and will be executed when the active one completes. (The method below
789 * which checks whether a range is already used will add the task)
790 *
791 * This is necessary because of the requirement to align all requests to a 512 boundary
792 * which is enforced by the host OS (Linux and Windows atm). It is possible that
793 * we have to process unaligned tasks and need to align them using bounce buffers.
794 * While the data is fetched from the file another request might arrive writing to
795 * the same range. This will result in data corruption if both are executed concurrently.
796 */
797 int rc = VINF_SUCCESS;
798 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
799 true /* fAlignedReq */);
800 if (!fLocked)
801 {
802 /* Get a request handle. */
803 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
804 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
805
806 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
807 {
808 /* Grow the file if needed. */
809 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
810 {
811 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
812 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
813 }
814
815 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
816 pTask->Off, pTask->DataSeg.pvSeg,
817 pTask->DataSeg.cbSeg, pTask);
818 }
819 else
820 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
821 pTask->Off, pTask->DataSeg.pvSeg,
822 pTask->DataSeg.cbSeg, pTask);
823 AssertRC(rc);
824
825 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
826 pTask->DataSeg.cbSeg,
827 pTask, true /* fAlignedReq */);
828
829 if (RT_SUCCESS(rc))
830 {
831 pTask->hReq = hReq;
832 *phReq = hReq;
833 }
834 }
835 else
836 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
837
838 return rc;
839}
840
841static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
842 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
843 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
844{
845 /*
846 * Check if the alignment requirements are met.
847 * Offset, transfer size and buffer address
848 * need to be on a 512 boundary.
849 */
850 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
851 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
852 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
853 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
854 && offStart == pTask->Off;
855
856 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
857 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
858 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
859 offStart, cbToTransfer, pEndpoint->cbFile));
860
861 pTask->fPrefetch = false;
862
863 /*
864 * Before we start to setup the request we have to check whether there is a task
865 * already active which range intersects with ours. We have to defer execution
866 * of this task in two cases:
867 * - The pending task is a write and the current is either read or write
868 * - The pending task is a read and the current task is a write task.
869 *
870 * To check whether a range is currently "locked" we use the AVL tree where every pending task
871 * is stored by its file offset range. The current task will be added to the active task
872 * and will be executed when the active one completes. (The method below
873 * which checks whether a range is already used will add the task)
874 *
875 * This is necessary because of the requirement to align all requests to a 512 boundary
876 * which is enforced by the host OS (Linux and Windows atm). It is possible that
877 * we have to process unaligned tasks and need to align them using bounce buffers.
878 * While the data is fetched from the file another request might arrive writing to
879 * the same range. This will result in data corruption if both are executed concurrently.
880 */
881 int rc = VINF_SUCCESS;
882 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
883 if (!fLocked)
884 {
885 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
886 void *pvBuf = pTask->DataSeg.pvSeg;
887
888 /* Get a request handle. */
889 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
890 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
891
892 if ( !fAlignedReq
893 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
894 {
895 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
896 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
897
898 /* Create bounce buffer. */
899 pTask->cbBounceBuffer = cbToTransfer;
900
901 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
902 pTask->Off, offStart));
903 pTask->offBounceBuffer = pTask->Off - offStart;
904
905 /** @todo I think we need something like a RTMemAllocAligned method here.
906 * Current assumption is that the maximum alignment is 4096byte
907 * (GPT disk on Windows)
908 * so we can use RTMemPageAlloc here.
909 */
910 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
911 if (RT_LIKELY(pTask->pvBounceBuffer))
912 {
913 pvBuf = pTask->pvBounceBuffer;
914
915 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
916 {
917 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
918 || RT_UNLIKELY(offStart != pTask->Off))
919 {
920 /* We have to fill the buffer first before we can update the data. */
921 LogFlow(("Prefetching data for task %#p\n", pTask));
922 pTask->fPrefetch = true;
923 enmTransferType = PDMACTASKFILETRANSFER_READ;
924 }
925 else
926 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
927 }
928 }
929 else
930 rc = VERR_NO_MEMORY;
931 }
932 else
933 pTask->cbBounceBuffer = 0;
934
935 if (RT_SUCCESS(rc))
936 {
937 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
938 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
939
940 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
941 {
942 /* Grow the file if needed. */
943 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
944 {
945 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
946 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
947 }
948
949 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
950 offStart, pvBuf, cbToTransfer, pTask);
951 }
952 else
953 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
954 offStart, pvBuf, cbToTransfer, pTask);
955 AssertRC(rc);
956
957 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
958 if (RT_SUCCESS(rc))
959 {
960 pTask->hReq = hReq;
961 *phReq = hReq;
962 }
963 else
964 {
965 /* Cleanup */
966 if (pTask->cbBounceBuffer)
967 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
968 }
969 }
970 }
971 else
972 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
973
974 return rc;
975}
976
977static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
978 PPDMACEPFILEMGR pAioMgr,
979 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
980{
981 RTFILEAIOREQ apReqs[20];
982 unsigned cRequests = 0;
983 int rc = VINF_SUCCESS;
984
985 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
986 ("Trying to process request lists of a non active endpoint!\n"));
987
988 /* Go through the list and queue the requests until we get a flush request */
989 while ( pTaskHead
990 && !pEndpoint->pFlushReq
991 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
992 && RT_SUCCESS(rc))
993 {
994 RTMSINTERVAL msWhenNext;
995 PPDMACTASKFILE pCurr = pTaskHead;
996
997 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
998 {
999 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
1000 break;
1001 }
1002
1003 pTaskHead = pTaskHead->pNext;
1004
1005 pCurr->pNext = NULL;
1006
1007 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
1008 ("Endpoints do not match\n"));
1009
1010 switch (pCurr->enmTransferType)
1011 {
1012 case PDMACTASKFILETRANSFER_FLUSH:
1013 {
1014 /* If there is no data transfer request this flush request finished immediately. */
1015 if (pEndpoint->fAsyncFlushSupported)
1016 {
1017 /* Issue a flush to the host. */
1018 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1019 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1020
1021 LogFlow(("Flush request %#p\n", hReq));
1022
1023 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1024 if (RT_FAILURE(rc))
1025 {
1026 if (rc == VERR_NOT_SUPPORTED)
1027 LogRel(("AIOMgr: Async flushes not supported\n"));
1028 else
1029 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1030 pEndpoint->fAsyncFlushSupported = false;
1031 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1032 rc = VINF_SUCCESS; /* Fake success */
1033 }
1034 else
1035 {
1036 pCurr->hReq = hReq;
1037 apReqs[cRequests] = hReq;
1038 pEndpoint->AioMgr.cReqsProcessed++;
1039 cRequests++;
1040 }
1041 }
1042
1043 if ( !pEndpoint->AioMgr.cRequestsActive
1044 && !pEndpoint->fAsyncFlushSupported)
1045 {
1046 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1047 pdmacFileTaskFree(pEndpoint, pCurr);
1048 }
1049 else
1050 {
1051 Assert(!pEndpoint->pFlushReq);
1052 pEndpoint->pFlushReq = pCurr;
1053 }
1054 break;
1055 }
1056 case PDMACTASKFILETRANSFER_READ:
1057 case PDMACTASKFILETRANSFER_WRITE:
1058 {
1059 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1060
1061 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1062 {
1063 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1064 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1065 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1066 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1067 else
1068 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1069
1070 AssertRC(rc);
1071 }
1072 else
1073 {
1074 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1075 hReq = pCurr->hReq;
1076 }
1077
1078 LogFlow(("Read/Write request %#p\n", hReq));
1079
1080 if (hReq != NIL_RTFILEAIOREQ)
1081 {
1082 apReqs[cRequests] = hReq;
1083 cRequests++;
1084 }
1085 break;
1086 }
1087 default:
1088 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1089 } /* switch transfer type */
1090
1091 /* Queue the requests if the array is full. */
1092 if (cRequests == RT_ELEMENTS(apReqs))
1093 {
1094 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1095 cRequests = 0;
1096 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1097 ("Unexpected return code\n"));
1098 }
1099 }
1100
1101 if (cRequests)
1102 {
1103 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1104 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1105 ("Unexpected return code rc=%Rrc\n", rc));
1106 }
1107
1108 if (pTaskHead)
1109 {
1110 /* Add the rest of the tasks to the pending list */
1111 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1112
1113 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1114 && !pEndpoint->pFlushReq))
1115 {
1116#if 0
1117 /*
1118 * The I/O manager has no room left for more requests
1119 * but there are still requests to process.
1120 * Create a new I/O manager and let it handle some endpoints.
1121 */
1122 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1123#else
1124 /* Grow the I/O manager */
1125 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1126#endif
1127 }
1128 }
1129
1130 /* Insufficient resources are not fatal. */
1131 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1132 rc = VINF_SUCCESS;
1133
1134 return rc;
1135}
1136
1137/**
1138 * Adds all pending requests for the given endpoint
1139 * until a flush request is encountered or there is no
1140 * request anymore.
1141 *
1142 * @returns VBox status code.
1143 * @param pAioMgr The async I/O manager for the endpoint
1144 * @param pEndpoint The endpoint to get the requests from.
1145 */
1146static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1147 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1148{
1149 int rc = VINF_SUCCESS;
1150 PPDMACTASKFILE pTasksHead = NULL;
1151
1152 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1153 ("Trying to process request lists of a non active endpoint!\n"));
1154
1155 Assert(!pEndpoint->pFlushReq);
1156
1157 /* Check the pending list first */
1158 if (pEndpoint->AioMgr.pReqsPendingHead)
1159 {
1160 LogFlow(("Queuing pending requests first\n"));
1161
1162 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1163 /*
1164 * Clear the list as the processing routine will insert them into the list
1165 * again if it gets a flush request.
1166 */
1167 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1168 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1169 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1170 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1171 }
1172
1173 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1174 {
1175 /* Now the request queue. */
1176 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1177 if (pTasksHead)
1178 {
1179 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1180 AssertRC(rc);
1181 }
1182 }
1183
1184 return rc;
1185}
1186
1187static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1188{
1189 int rc = VINF_SUCCESS;
1190 bool fNotifyWaiter = false;
1191
1192 LogFlowFunc((": Enter\n"));
1193
1194 Assert(pAioMgr->fBlockingEventPending);
1195
1196 switch (pAioMgr->enmBlockingEvent)
1197 {
1198 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1199 {
1200 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1201 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1202
1203 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1204
1205 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1206 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1207 if (pAioMgr->pEndpointsHead)
1208 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1209 pAioMgr->pEndpointsHead = pEndpointNew;
1210
1211 /* Assign the completion point to this file. */
1212 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1213 fNotifyWaiter = true;
1214 pAioMgr->cEndpoints++;
1215 break;
1216 }
1217 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1218 {
1219 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1220 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1221
1222 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1223 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1224 break;
1225 }
1226 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1227 {
1228 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1229 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1230
1231 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1232 {
1233 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1234
1235 /* Make sure all tasks finished. Process the queues a last time first. */
1236 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1237 AssertRC(rc);
1238
1239 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1240 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1241 }
1242 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1243 && (!pEndpointClose->AioMgr.cRequestsActive))
1244 fNotifyWaiter = true;
1245 break;
1246 }
1247 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1248 {
1249 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1250 if (!pAioMgr->cRequestsActive)
1251 fNotifyWaiter = true;
1252 break;
1253 }
1254 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1255 {
1256 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1257 break;
1258 }
1259 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1260 {
1261 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1262 fNotifyWaiter = true;
1263 break;
1264 }
1265 default:
1266 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1267 }
1268
1269 if (fNotifyWaiter)
1270 {
1271 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1272 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1273
1274 /* Release the waiting thread. */
1275 LogFlow(("Signalling waiter\n"));
1276 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1277 AssertRC(rc);
1278 }
1279
1280 LogFlowFunc((": Leave\n"));
1281 return rc;
1282}
1283
1284/**
1285 * Checks all endpoints for pending events or new requests.
1286 *
1287 * @returns VBox status code.
1288 * @param pAioMgr The I/O manager handle.
1289 */
1290static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1291{
1292 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1293 int rc = VINF_SUCCESS;
1294 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1295
1296 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1297
1298 while (pEndpoint)
1299 {
1300 if (!pEndpoint->pFlushReq
1301 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1302 && !pEndpoint->AioMgr.fMoving)
1303 {
1304 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1305 if (RT_FAILURE(rc))
1306 return rc;
1307 }
1308 else if ( !pEndpoint->AioMgr.cRequestsActive
1309 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1310 {
1311 /* Reopen the file so that the new endpoint can re-associate with the file */
1312 RTFileClose(pEndpoint->hFile);
1313 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1314 AssertRC(rc);
1315
1316 if (pEndpoint->AioMgr.fMoving)
1317 {
1318 pEndpoint->AioMgr.fMoving = false;
1319 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1320 }
1321 else
1322 {
1323 Assert(pAioMgr->fBlockingEventPending);
1324 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1325
1326 /* Release the waiting thread. */
1327 LogFlow(("Signalling waiter\n"));
1328 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1329 AssertRC(rc);
1330 }
1331 }
1332
1333 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1334 }
1335
1336 return rc;
1337}
1338
1339/**
1340 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1341 */
1342static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1343{
1344 size_t cbTransfered = 0;
1345 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1346
1347 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1348}
1349
1350static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1351 int rcReq, size_t cbTransfered)
1352{
1353 int rc = VINF_SUCCESS;
1354 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1355 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1356 PPDMACTASKFILE pTasksWaiting;
1357
1358 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1359
1360 pEndpoint = pTask->pEndpoint;
1361
1362 pTask->hReq = NIL_RTFILEAIOREQ;
1363
1364 pAioMgr->cRequestsActive--;
1365 pEndpoint->AioMgr.cRequestsActive--;
1366 pEndpoint->AioMgr.cReqsProcessed++;
1367
1368 /*
1369 * It is possible that the request failed on Linux with kernels < 2.6.23
1370 * if the passed buffer was allocated with remap_pfn_range or if the file
1371 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1372 * The endpoint will be migrated to a failsafe manager in case a request fails.
1373 */
1374 if (RT_FAILURE(rcReq))
1375 {
1376 /* Free bounce buffers and the IPRT request. */
1377 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1378
1379 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1380 {
1381 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1382 pEndpoint->fAsyncFlushSupported = false;
1383 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1384 /* The other method will take over now. */
1385
1386 pEndpoint->pFlushReq = NULL;
1387 /* Call completion callback */
1388 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1389 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1390 pdmacFileTaskFree(pEndpoint, pTask);
1391 }
1392 else
1393 {
1394 /* Free the lock and process pending tasks if necessary */
1395 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1396 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1397 AssertRC(rc);
1398
1399 if (pTask->cbBounceBuffer)
1400 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1401
1402 /*
1403 * Fatal errors are reported to the guest and non-fatal errors
1404 * will cause a migration to the failsafe manager in the hope
1405 * that the error disappears.
1406 */
1407 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1408 {
1409 /* Queue the request on the pending list. */
1410 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1411 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1412
1413 /* Create a new failsafe manager if necessary. */
1414 if (!pEndpoint->AioMgr.fMoving)
1415 {
1416 PPDMACEPFILEMGR pAioMgrFailsafe;
1417
1418 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1419 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1420
1421 pEndpoint->AioMgr.fMoving = true;
1422
1423 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1424 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1425 AssertRC(rc);
1426
1427 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1428
1429 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1430 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1431 }
1432
1433 /* If this was the last request for the endpoint migrate it to the new manager. */
1434 if (!pEndpoint->AioMgr.cRequestsActive)
1435 {
1436 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1437 Assert(!fReqsPending); NOREF(fReqsPending);
1438
1439 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1440 AssertRC(rc);
1441 }
1442 }
1443 else
1444 {
1445 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1446 pdmacFileTaskFree(pEndpoint, pTask);
1447 }
1448 }
1449 }
1450 else
1451 {
1452 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1453 {
1454 /* Clear pending flush */
1455 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1456 pEndpoint->pFlushReq = NULL;
1457 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1458
1459 /* Call completion callback */
1460 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1461 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1462 pdmacFileTaskFree(pEndpoint, pTask);
1463 }
1464 else
1465 {
1466 /*
1467 * Restart an incomplete transfer.
1468 * This usually means that the request will return an error now
1469 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1470 * the transfer needs to be continued.
1471 */
1472 pTask->cbTransfered += cbTransfered;
1473
1474 if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg
1475 || ( pTask->cbBounceBuffer
1476 && pTask->cbTransfered < pTask->cbBounceBuffer)))
1477 {
1478 RTFOFF offStart;
1479 size_t cbToTransfer;
1480 uint8_t *pbBuf = NULL;
1481
1482 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1483 pTask, cbTransfered));
1484 Assert(cbTransfered % 512 == 0);
1485
1486 if (pTask->cbBounceBuffer)
1487 {
1488 AssertPtr(pTask->pvBounceBuffer);
1489 offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
1490 cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
1491 pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
1492 }
1493 else
1494 {
1495 Assert(!pTask->pvBounceBuffer);
1496 offStart = pTask->Off + pTask->cbTransfered;
1497 cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
1498 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
1499 }
1500
1501 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1502 {
1503 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1504 pbBuf, cbToTransfer, pTask);
1505 }
1506 else
1507 {
1508 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1509 ("Invalid transfer type\n"));
1510 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1511 pbBuf, cbToTransfer, pTask);
1512 }
1513 AssertRC(rc);
1514
1515 pTask->hReq = hReq;
1516 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1517 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1518 ("Unexpected return code rc=%Rrc\n", rc));
1519 }
1520 else if (pTask->fPrefetch)
1521 {
1522 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1523 Assert(pTask->cbBounceBuffer);
1524
1525 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1526 pTask->DataSeg.pvSeg,
1527 pTask->DataSeg.cbSeg);
1528
1529 /* Write it now. */
1530 pTask->fPrefetch = false;
1531 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1532 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1533
1534 pTask->cbTransfered = 0;
1535
1536 /* Grow the file if needed. */
1537 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1538 {
1539 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1540 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1541 }
1542
1543 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1544 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1545 AssertRC(rc);
1546 pTask->hReq = hReq;
1547 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1548 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1549 ("Unexpected return code rc=%Rrc\n", rc));
1550 }
1551 else
1552 {
1553 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1554 {
1555 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1556 memcpy(pTask->DataSeg.pvSeg,
1557 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1558 pTask->DataSeg.cbSeg);
1559
1560 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1561 }
1562
1563 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1564
1565 /* Free the lock and process pending tasks if necessary */
1566 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1567 if (pTasksWaiting)
1568 {
1569 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1570 AssertRC(rc);
1571 }
1572
1573 /* Call completion callback */
1574 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1575 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1576 pdmacFileTaskFree(pEndpoint, pTask);
1577
1578 /*
1579 * If there is no request left on the endpoint but a flush request is set
1580 * it completed now and we notify the owner.
1581 * Furthermore we look for new requests and continue.
1582 */
1583 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1584 {
1585 /* Call completion callback */
1586 pTask = pEndpoint->pFlushReq;
1587 pEndpoint->pFlushReq = NULL;
1588
1589 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1590
1591 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1592 pdmacFileTaskFree(pEndpoint, pTask);
1593 }
1594 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1595 {
1596 /* If the endpoint is about to be migrated do it now. */
1597 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1598 Assert(!fReqsPending); NOREF(fReqsPending);
1599
1600 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1601 AssertRC(rc);
1602 }
1603 }
1604 } /* Not a flush request */
1605 } /* request completed successfully */
1606}
1607
1608/** Helper macro for checking for error codes. */
1609#define CHECK_RC(pAioMgr, rc) \
1610 if (RT_FAILURE(rc)) \
1611 {\
1612 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1613 return rc2;\
1614 }
1615
1616/**
1617 * The normal I/O manager using the RTFileAio* API
1618 *
1619 * @returns VBox status code.
1620 * @param hThreadSelf Handle of the thread.
1621 * @param pvUser Opaque user data.
1622 */
1623DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1624{
1625 int rc = VINF_SUCCESS;
1626 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1627 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1628 NOREF(hThreadSelf);
1629
1630 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1631 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1632 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1633 {
1634 if (!pAioMgr->cRequestsActive)
1635 {
1636 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1637 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1638 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1639 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1640 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1641
1642 LogFlow(("Got woken up\n"));
1643 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1644 }
1645
1646 /* Check for an external blocking event first. */
1647 if (pAioMgr->fBlockingEventPending)
1648 {
1649 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1650 CHECK_RC(pAioMgr, rc);
1651 }
1652
1653 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1654 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1655 {
1656 /* We got woken up because an endpoint issued new requests. Queue them. */
1657 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1658 CHECK_RC(pAioMgr, rc);
1659
1660 while (pAioMgr->cRequestsActive)
1661 {
1662 RTFILEAIOREQ apReqs[20];
1663 uint32_t cReqsCompleted = 0;
1664 size_t cReqsWait;
1665
1666 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1667 cReqsWait = RT_ELEMENTS(apReqs);
1668 else
1669 cReqsWait = pAioMgr->cRequestsActive;
1670
1671 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1672
1673 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1674 1,
1675 RT_INDEFINITE_WAIT, apReqs,
1676 cReqsWait, &cReqsCompleted);
1677 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1678 CHECK_RC(pAioMgr, rc);
1679
1680 LogFlow(("%d tasks completed\n", cReqsCompleted));
1681
1682 for (uint32_t i = 0; i < cReqsCompleted; i++)
1683 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1684
1685 /* Check for an external blocking event before we go to sleep again. */
1686 if (pAioMgr->fBlockingEventPending)
1687 {
1688 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1689 CHECK_RC(pAioMgr, rc);
1690 }
1691
1692 /* Update load statistics. */
1693 uint64_t uMillisCurr = RTTimeMilliTS();
1694 if (uMillisCurr > uMillisEnd)
1695 {
1696 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1697
1698 /* Calculate timespan. */
1699 uMillisCurr -= uMillisEnd;
1700
1701 while (pEndpointCurr)
1702 {
1703 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1704 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1705 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1706 }
1707
1708 /* Set new update interval */
1709 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1710 }
1711
1712 /* Check endpoints for new requests. */
1713 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1714 {
1715 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1716 CHECK_RC(pAioMgr, rc);
1717 }
1718 } /* while requests are active. */
1719
1720 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1721 {
1722 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1723 AssertRC(rc);
1724 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1725
1726 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1727 CHECK_RC(pAioMgr, rc);
1728 }
1729 } /* if still running */
1730 } /* while running */
1731
1732 LogFlowFunc(("rc=%Rrc\n", rc));
1733 return rc;
1734}
1735
1736#undef CHECK_RC
1737
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette