VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 28317

Last change on this file since 28317 was 28317, checked in by vboxsync, 15 years ago

RTMemPageFree + all users: Added size parameter to RTMemPageFree so we can avoid tracking structures when using mmap/munmap.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 53.4 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 28317 2010-04-14 18:06:05Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <iprt/assert.h>
29#include <VBox/log.h>
30
31#include "PDMAsyncCompletionFileInternal.h"
32
33/** The update period for the I/O load statistics in ms. */
34#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
35/** Maximum number of requests a manager will handle. */
36#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
37
38/*******************************************************************************
39* Internal functions *
40*******************************************************************************/
41static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
42 PPDMACEPFILEMGR pAioMgr,
43 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
44
45static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
46 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
47 PPDMACFILERANGELOCK pRangeLock);
48
49int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
50{
51 int rc = VINF_SUCCESS;
52
53 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_MAX;
54
55 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
56 if (rc == VERR_OUT_OF_RANGE)
57 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
58
59 if (RT_SUCCESS(rc))
60 {
61 /* Initialize request handle array. */
62 pAioMgr->iFreeEntryNext = 0;
63 pAioMgr->iFreeReqNext = 0;
64 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax + 1;
65 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
66
67 if (pAioMgr->pahReqsFree)
68 {
69 /* Create the range lock memcache. */
70 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
71 0, UINT32_MAX, NULL, NULL, NULL, 0);
72 if (RT_SUCCESS(rc))
73 return VINF_SUCCESS;
74
75 RTMemFree(pAioMgr->pahReqsFree);
76 }
77 else
78 {
79 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
80 rc = VERR_NO_MEMORY;
81 }
82 }
83
84 return rc;
85}
86
87void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
88{
89 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
90
91 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
92 {
93 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
94 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
95 }
96
97 RTMemFree(pAioMgr->pahReqsFree);
98 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
99}
100
101/**
102 * Sorts the endpoint list with insertion sort.
103 */
104static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
105{
106 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
107
108 pEpPrev = pAioMgr->pEndpointsHead;
109 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
110
111 while (pEpCurr)
112 {
113 /* Remember the next element to sort because the list might change. */
114 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
115
116 /* Unlink the current element from the list. */
117 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
118 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
119
120 if (pPrev)
121 pPrev->AioMgr.pEndpointNext = pNext;
122 else
123 pAioMgr->pEndpointsHead = pNext;
124
125 if (pNext)
126 pNext->AioMgr.pEndpointPrev = pPrev;
127
128 /* Go back until we reached the place to insert the current endpoint into. */
129 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
130 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
131
132 /* Link the endpoint into the list. */
133 if (pEpPrev)
134 pNext = pEpPrev->AioMgr.pEndpointNext;
135 else
136 pNext = pAioMgr->pEndpointsHead;
137
138 pEpCurr->AioMgr.pEndpointNext = pNext;
139 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
140
141 if (pNext)
142 pNext->AioMgr.pEndpointPrev = pEpCurr;
143
144 if (pEpPrev)
145 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
146 else
147 pAioMgr->pEndpointsHead = pEpCurr;
148
149 pEpCurr = pEpNextToSort;
150 }
151
152#ifdef DEBUG
153 /* Validate sorting alogrithm */
154 unsigned cEndpoints = 0;
155 pEpCurr = pAioMgr->pEndpointsHead;
156
157 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
158 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
159
160 while (pEpCurr)
161 {
162 cEndpoints++;
163
164 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
165 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
166
167 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
168 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
169
170 pEpCurr = pNext;
171 }
172
173 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
174
175#endif
176}
177
178/**
179 * Removes an endpoint from the currently assigned manager.
180 *
181 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
182 * FALSE otherwise.
183 * @param pEndpointRemove The endpoint to remove.
184 */
185static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
186{
187 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
188 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
189 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
190
191 pAioMgr->cEndpoints--;
192
193 if (pPrev)
194 pPrev->AioMgr.pEndpointNext = pNext;
195 else
196 pAioMgr->pEndpointsHead = pNext;
197
198 if (pNext)
199 pNext->AioMgr.pEndpointPrev = pPrev;
200
201 /* Make sure that there is no request pending on this manager for the endpoint. */
202 if (!pEndpointRemove->AioMgr.cRequestsActive)
203 {
204 Assert(!pEndpointRemove->pFlushReq);
205
206 /* Reopen the file so that the new endpoint can reassociate with the file */
207 RTFileClose(pEndpointRemove->File);
208 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
209 AssertRC(rc);
210 return false;
211 }
212
213 return true;
214}
215
216static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
217{
218 /* Balancing doesn't make sense with only one endpoint. */
219 if (pAioMgr->cEndpoints == 1)
220 return false;
221
222 /* Doesn't make sens to move endpoints if only one produces the whole load */
223 unsigned cEndpointsWithLoad = 0;
224
225 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
226
227 while (pCurr)
228 {
229 if (pCurr->AioMgr.cReqsPerSec)
230 cEndpointsWithLoad++;
231
232 pCurr = pCurr->AioMgr.pEndpointNext;
233 }
234
235 return (cEndpointsWithLoad > 1);
236}
237
238/**
239 * Creates a new I/O manager and spreads the I/O load of the endpoints
240 * between the given I/O manager and the new one.
241 *
242 * @returns nothing.
243 * @param pAioMgr The I/O manager with high I/O load.
244 */
245static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
246{
247 PPDMACEPFILEMGR pAioMgrNew = NULL;
248 int rc = VINF_SUCCESS;
249
250 /*
251 * Check if balancing would improve the situation.
252 */
253 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
254 {
255 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
256
257 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
258 if (RT_SUCCESS(rc))
259 {
260 /* We will sort the list by request count per second. */
261 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
262
263 /* Now move some endpoints to the new manager. */
264 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
265 unsigned cReqsOther = 0;
266 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
267
268 while (pCurr)
269 {
270 if (cReqsHere <= cReqsOther)
271 {
272 /*
273 * The other manager has more requests to handle now.
274 * We will keep the current endpoint.
275 */
276 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
277 cReqsHere += pCurr->AioMgr.cReqsPerSec;
278 pCurr = pCurr->AioMgr.pEndpointNext;
279 }
280 else
281 {
282 /* Move to other endpoint. */
283 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
284 cReqsOther += pCurr->AioMgr.cReqsPerSec;
285
286 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
287
288 pCurr = pCurr->AioMgr.pEndpointNext;
289
290 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
291
292 if (fReqsPending)
293 {
294 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
295 pMove->AioMgr.fMoving = true;
296 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
297 }
298 else
299 {
300 pMove->AioMgr.fMoving = false;
301 pMove->AioMgr.pAioMgrDst = NULL;
302 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
303 }
304 }
305 }
306 }
307 else
308 {
309 /* Don't process further but leave a log entry about reduced performance. */
310 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
311 }
312 }
313 else
314 Log(("AIOMgr: Load balancing would not improve anything\n"));
315}
316
317/**
318 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
319 *
320 * @returns VBox status code
321 * @param pAioMgr The I/O manager the error ocurred on.
322 * @param rc The error code.
323 */
324static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
325{
326 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
327 pAioMgr, rc));
328 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
329 LogRel(("AIOMgr: Please contact the product vendor\n"));
330
331 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
332
333 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
334 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
335
336 AssertMsgFailed(("Implement\n"));
337 return VINF_SUCCESS;
338}
339
340/**
341 * Put a list of tasks in the pending request list of an endpoint.
342 */
343DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
344{
345 /* Add the rest of the tasks to the pending list */
346 if (!pEndpoint->AioMgr.pReqsPendingHead)
347 {
348 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
349 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
350 }
351 else
352 {
353 Assert(pEndpoint->AioMgr.pReqsPendingTail);
354 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
355 }
356
357 /* Update the tail. */
358 while (pTaskHead->pNext)
359 pTaskHead = pTaskHead->pNext;
360
361 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
362}
363
364/**
365 * Put one task in the pending request list of an endpoint.
366 */
367DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
368{
369 /* Add the rest of the tasks to the pending list */
370 if (!pEndpoint->AioMgr.pReqsPendingHead)
371 {
372 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
373 pEndpoint->AioMgr.pReqsPendingHead = pTask;
374 }
375 else
376 {
377 Assert(pEndpoint->AioMgr.pReqsPendingTail);
378 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
379 }
380
381 pEndpoint->AioMgr.pReqsPendingTail = pTask;
382}
383
384/**
385 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
386 */
387static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
388 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
389 PRTFILEAIOREQ pahReqs, unsigned cReqs)
390{
391 int rc;
392
393 pAioMgr->cRequestsActive += cReqs;
394 pEndpoint->AioMgr.cRequestsActive += cReqs;
395
396 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
397 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
398
399 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
400 if (RT_FAILURE(rc))
401 {
402 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
403 {
404 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
405
406 /*
407 * We run out of resources.
408 * Need to check which requests got queued
409 * and put the rest on the pending list again.
410 */
411 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
412 {
413 pEpClass->fOutOfResourcesWarningPrinted = true;
414 LogRel(("AIOMgr: The operating system doesn't have enough resources "
415 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
416 }
417
418 for (size_t i = 0; i < cReqs; i++)
419 {
420 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
421
422 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
423 {
424 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
425 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
426
427 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
428 PPDMACTASKFILE pTasksWaiting;
429
430 /* Put the entry on the free array */
431 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
432 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
433
434 if (pTask->cbBounceBuffer)
435 RTMemFree(pTask->pvBounceBuffer);
436
437 pTask->fPrefetch = false;
438
439 /* Free the lock and process pending tasks if neccessary */
440 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
441
442 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
443 if (pTasksWaiting)
444 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTasksWaiting);
445
446 pAioMgr->cRequestsActive--;
447 pEndpoint->AioMgr.cRequestsActive--;
448 }
449
450 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
451 }
452
453 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
454 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
455 }
456 else
457 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
458 }
459
460 return rc;
461}
462
463/**
464 * Allocates a async I/O request.
465 *
466 * @returns Handle to the request.
467 * @param pAioMgr The I/O manager.
468 */
469static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
470{
471 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
472
473 /* Get a request handle. */
474 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
475 {
476 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
477 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
478 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
479 }
480 else
481 {
482 int rc = RTFileAioReqCreate(&hReq);
483 AssertRC(rc);
484 }
485
486 return hReq;
487}
488
489static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
490 RTFOFF offStart, size_t cbRange,
491 PPDMACTASKFILE pTask)
492{
493 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
494
495 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
496 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
497 ("Invalid task type %d\n", pTask->enmTransferType));
498
499 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
500 if (!pRangeLock)
501 {
502 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
503 /* Check if we intersect with the range. */
504 if ( !pRangeLock
505 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
506 && (pRangeLock->Core.KeyLast) >= offStart))
507 {
508 pRangeLock = NULL; /* False alarm */
509 }
510 }
511
512 /* Check whether we have one of the situations explained below */
513 if ( pRangeLock
514#if 0 /** @todo: later. For now we will just block all requests if they interfere */
515 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
516 || (!pRangeLock->fReadLock)
517#endif
518 )
519 {
520 /* Add to the list. */
521 pTask->pNext = NULL;
522
523 if (!pRangeLock->pWaitingTasksHead)
524 {
525 Assert(!pRangeLock->pWaitingTasksTail);
526 pRangeLock->pWaitingTasksHead = pTask;
527 pRangeLock->pWaitingTasksTail = pTask;
528 }
529 else
530 {
531 AssertPtr(pRangeLock->pWaitingTasksTail);
532 pRangeLock->pWaitingTasksTail->pNext = pTask;
533 pRangeLock->pWaitingTasksTail = pTask;
534 }
535 return true;
536 }
537
538 return false;
539}
540
541static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
542 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
543 RTFOFF offStart, size_t cbRange,
544 PPDMACTASKFILE pTask)
545{
546 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
547 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
548 offStart, cbRange));
549
550 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
551 if (!pRangeLock)
552 return VERR_NO_MEMORY;
553
554 /* Init the lock. */
555 pRangeLock->Core.Key = offStart;
556 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
557 pRangeLock->cRefs = 1;
558 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
559 pRangeLock->pWaitingTasksHead = NULL;
560 pRangeLock->pWaitingTasksTail = NULL;
561
562 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
563 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
564
565 /* Let the task point to its lock. */
566 pTask->pRangeLock = pRangeLock;
567
568 return VINF_SUCCESS;
569}
570
571static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
572 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
573 PPDMACFILERANGELOCK pRangeLock)
574{
575 PPDMACTASKFILE pTasksWaitingHead;
576
577 AssertPtr(pRangeLock);
578 Assert(pRangeLock->cRefs == 1);
579
580 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
581 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
582 pRangeLock->pWaitingTasksHead = NULL;
583 pRangeLock->pWaitingTasksTail = NULL;
584 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
585
586 return pTasksWaitingHead;
587}
588
589static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
590 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
591 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
592{
593 int rc = VINF_SUCCESS;
594 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
595 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
596 void *pvBuf = pTask->DataSeg.pvSeg;
597
598 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
599 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
600 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
601 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
602
603 pTask->fPrefetch = false;
604 pTask->cbBounceBuffer = 0;
605
606 /*
607 * Before we start to setup the request we have to check whether there is a task
608 * already active which range intersects with ours. We have to defer execution
609 * of this task in two cases:
610 * - The pending task is a write and the current is either read or write
611 * - The pending task is a read and the current task is a write task.
612 *
613 * To check whether a range is currently "locked" we use the AVL tree where every pending task
614 * is stored by its file offset range. The current task will be added to the active task
615 * and will be executed when the active one completes. (The method below
616 * which checks whether a range is already used will add the task)
617 *
618 * This is neccessary because of the requirement to align all requests to a 512 boundary
619 * which is enforced by the host OS (Linux and Windows atm). It is possible that
620 * we have to process unaligned tasks and need to align them using bounce buffers.
621 * While the data is fetched from the file another request might arrive writing to
622 * the same range. This will result in data corruption if both are executed concurrently.
623 */
624 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
625
626 if (!fLocked)
627 {
628 /* Get a request handle. */
629 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
630 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
631
632 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
633 {
634 /* Grow the file if needed. */
635 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
636 {
637 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
638 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
639 }
640
641 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
642 pTask->Off, pTask->DataSeg.pvSeg,
643 pTask->DataSeg.cbSeg, pTask);
644 }
645 else
646 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
647 pTask->Off, pTask->DataSeg.pvSeg,
648 pTask->DataSeg.cbSeg, pTask);
649 AssertRC(rc);
650
651 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
652 pTask->DataSeg.cbSeg,
653 pTask);
654
655 if (RT_SUCCESS(rc))
656 *phReq = hReq;
657 }
658 else
659 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
660
661 return rc;
662}
663
664static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
665 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
666 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
667{
668 int rc = VINF_SUCCESS;
669 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
670 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
671 void *pvBuf = pTask->DataSeg.pvSeg;
672
673 /*
674 * Check if the alignment requirements are met.
675 * Offset, transfer size and buffer address
676 * need to be on a 512 boundary.
677 */
678 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
679 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
680 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
681
682 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
683 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
684 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
685 offStart, cbToTransfer, pEndpoint->cbFile));
686
687 pTask->fPrefetch = false;
688
689 /*
690 * Before we start to setup the request we have to check whether there is a task
691 * already active which range intersects with ours. We have to defer execution
692 * of this task in two cases:
693 * - The pending task is a write and the current is either read or write
694 * - The pending task is a read and the current task is a write task.
695 *
696 * To check whether a range is currently "locked" we use the AVL tree where every pending task
697 * is stored by its file offset range. The current task will be added to the active task
698 * and will be executed when the active one completes. (The method below
699 * which checks whether a range is already used will add the task)
700 *
701 * This is neccessary because of the requirement to align all requests to a 512 boundary
702 * which is enforced by the host OS (Linux and Windows atm). It is possible that
703 * we have to process unaligned tasks and need to align them using bounce buffers.
704 * While the data is fetched from the file another request might arrive writing to
705 * the same range. This will result in data corruption if both are executed concurrently.
706 */
707 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
708
709 if (!fLocked)
710 {
711 /* Get a request handle. */
712 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
713 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
714
715 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
716 || RT_UNLIKELY(offStart != pTask->Off)
717 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
718 {
719 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
720 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
721
722 /* Create bounce buffer. */
723 pTask->cbBounceBuffer = cbToTransfer;
724
725 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
726 pTask->Off, offStart));
727 pTask->offBounceBuffer = pTask->Off - offStart;
728
729 /** @todo: I think we need something like a RTMemAllocAligned method here.
730 * Current assumption is that the maximum alignment is 4096byte
731 * (GPT disk on Windows)
732 * so we can use RTMemPageAlloc here.
733 */
734 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
735 if (RT_LIKELY(pTask->pvBounceBuffer))
736 {
737 pvBuf = pTask->pvBounceBuffer;
738
739 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
740 {
741 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
742 || RT_UNLIKELY(offStart != pTask->Off))
743 {
744 /* We have to fill the buffer first before we can update the data. */
745 LogFlow(("Prefetching data for task %#p\n", pTask));
746 pTask->fPrefetch = true;
747 enmTransferType = PDMACTASKFILETRANSFER_READ;
748 }
749 else
750 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
751 }
752 }
753 else
754 rc = VERR_NO_MEMORY;
755 }
756 else
757 pTask->cbBounceBuffer = 0;
758
759 if (RT_SUCCESS(rc))
760 {
761 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
762 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
763
764 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
765 {
766 /* Grow the file if needed. */
767 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
768 {
769 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
770 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
771 }
772
773 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
774 offStart, pvBuf, cbToTransfer, pTask);
775 }
776 else
777 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
778 offStart, pvBuf, cbToTransfer, pTask);
779 AssertRC(rc);
780
781 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
782
783 if (RT_SUCCESS(rc))
784 *phReq = hReq;
785 else
786 {
787 /* Cleanup */
788 if (pTask->cbBounceBuffer)
789 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
790 }
791 }
792 }
793 else
794 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
795
796 return rc;
797}
798
799static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
800 PPDMACEPFILEMGR pAioMgr,
801 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
802{
803 RTFILEAIOREQ apReqs[20];
804 unsigned cRequests = 0;
805 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
806 int rc = VINF_SUCCESS;
807
808 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
809 ("Trying to process request lists of a non active endpoint!\n"));
810
811 /* Go through the list and queue the requests until we get a flush request */
812 while ( pTaskHead
813 && !pEndpoint->pFlushReq
814 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
815 && RT_SUCCESS(rc))
816 {
817 PPDMACTASKFILE pCurr = pTaskHead;
818
819 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, (uint32_t)pCurr->DataSeg.cbSeg))
820 {
821 pAioMgr->fBwLimitReached = true;
822 break;
823 }
824
825 pTaskHead = pTaskHead->pNext;
826
827 pCurr->pNext = NULL;
828
829 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
830 ("Endpoints do not match\n"));
831
832 switch (pCurr->enmTransferType)
833 {
834 case PDMACTASKFILETRANSFER_FLUSH:
835 {
836 /* If there is no data transfer request this flush request finished immediately. */
837 if (!pEndpoint->AioMgr.cRequestsActive)
838 {
839 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
840 pdmacFileTaskFree(pEndpoint, pCurr);
841 }
842 else
843 {
844 Assert(!pEndpoint->pFlushReq);
845 pEndpoint->pFlushReq = pCurr;
846 }
847 break;
848 }
849 case PDMACTASKFILETRANSFER_READ:
850 case PDMACTASKFILETRANSFER_WRITE:
851 {
852 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
853
854 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
855 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
856 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
857 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
858 else
859 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
860
861 AssertRC(rc);
862
863 if (hReq != NIL_RTFILEAIOREQ)
864 {
865 apReqs[cRequests] = hReq;
866 pEndpoint->AioMgr.cReqsProcessed++;
867 cRequests++;
868 if (cRequests == RT_ELEMENTS(apReqs))
869 {
870 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
871 cRequests = 0;
872 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
873 ("Unexpected return code\n"));
874 }
875 }
876 break;
877 }
878 default:
879 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
880 }
881 }
882
883 if (cRequests)
884 {
885 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
886 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
887 ("Unexpected return code rc=%Rrc\n", rc));
888 }
889
890 if (pTaskHead)
891 {
892 /* Add the rest of the tasks to the pending list */
893 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
894
895 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
896 && !pEndpoint->pFlushReq
897 && !pAioMgr->fBwLimitReached))
898 {
899 /*
900 * The I/O manager has no room left for more requests
901 * but there are still requests to process.
902 * Create a new I/O manager and let it handle some endpoints.
903 */
904 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
905 }
906 }
907
908 /* Insufficient resources are not fatal. */
909 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
910 rc = VINF_SUCCESS;
911
912 return rc;
913}
914
915/**
916 * Adds all pending requests for the given endpoint
917 * until a flush request is encountered or there is no
918 * request anymore.
919 *
920 * @returns VBox status code.
921 * @param pAioMgr The async I/O manager for the endpoint
922 * @param pEndpoint The endpoint to get the requests from.
923 */
924static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
925 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
926{
927 int rc = VINF_SUCCESS;
928 PPDMACTASKFILE pTasksHead = NULL;
929
930 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
931 ("Trying to process request lists of a non active endpoint!\n"));
932
933 Assert(!pEndpoint->pFlushReq);
934
935 /* Check the pending list first */
936 if (pEndpoint->AioMgr.pReqsPendingHead)
937 {
938 LogFlow(("Queuing pending requests first\n"));
939
940 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
941 /*
942 * Clear the list as the processing routine will insert them into the list
943 * again if it gets a flush request.
944 */
945 pEndpoint->AioMgr.pReqsPendingHead = NULL;
946 pEndpoint->AioMgr.pReqsPendingTail = NULL;
947 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
948 AssertRC(rc);
949 }
950
951 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
952 {
953 /* Now the request queue. */
954 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
955 if (pTasksHead)
956 {
957 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
958 AssertRC(rc);
959 }
960 }
961
962 return rc;
963}
964
965static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
966{
967 int rc = VINF_SUCCESS;
968 bool fNotifyWaiter = false;
969
970 LogFlowFunc((": Enter\n"));
971
972 Assert(pAioMgr->fBlockingEventPending);
973
974 switch (pAioMgr->enmBlockingEvent)
975 {
976 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
977 {
978 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
979 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
980
981 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
982
983 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
984 pEndpointNew->AioMgr.pEndpointPrev = NULL;
985 if (pAioMgr->pEndpointsHead)
986 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
987 pAioMgr->pEndpointsHead = pEndpointNew;
988
989 /* Assign the completion point to this file. */
990 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
991 fNotifyWaiter = true;
992 pAioMgr->cEndpoints++;
993 break;
994 }
995 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
996 {
997 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
998 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
999
1000 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1001 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1002 break;
1003 }
1004 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1005 {
1006 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
1007 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1008
1009 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1010 {
1011 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1012
1013 /* Make sure all tasks finished. Process the queues a last time first. */
1014 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1015 AssertRC(rc);
1016
1017 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1018 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1019 }
1020 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1021 && (!pEndpointClose->AioMgr.cRequestsActive))
1022 fNotifyWaiter = true;
1023 break;
1024 }
1025 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1026 {
1027 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1028 if (!pAioMgr->cRequestsActive)
1029 fNotifyWaiter = true;
1030 break;
1031 }
1032 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1033 {
1034 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1035 break;
1036 }
1037 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1038 {
1039 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1040 fNotifyWaiter = true;
1041 break;
1042 }
1043 default:
1044 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1045 }
1046
1047 if (fNotifyWaiter)
1048 {
1049 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1050 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1051
1052 /* Release the waiting thread. */
1053 LogFlow(("Signalling waiter\n"));
1054 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1055 AssertRC(rc);
1056 }
1057
1058 LogFlowFunc((": Leave\n"));
1059 return rc;
1060}
1061
1062/**
1063 * Checks all endpoints for pending events or new requests.
1064 *
1065 * @returns VBox status code.
1066 * @param pAioMgr The I/O manager handle.
1067 */
1068static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1069{
1070 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1071 int rc = VINF_SUCCESS;
1072 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1073
1074 pAioMgr->fBwLimitReached = false;
1075
1076 while (pEndpoint)
1077 {
1078 if (!pEndpoint->pFlushReq
1079 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1080 && !pEndpoint->AioMgr.fMoving)
1081 {
1082 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1083 if (RT_FAILURE(rc))
1084 return rc;
1085 }
1086 else if (!pEndpoint->AioMgr.cRequestsActive)
1087 {
1088 /* Reopen the file so that the new endpoint can reassociate with the file */
1089 RTFileClose(pEndpoint->File);
1090 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1091 AssertRC(rc);
1092
1093 if (pEndpoint->AioMgr.fMoving)
1094 {
1095 pEndpoint->AioMgr.fMoving = false;
1096 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1097 }
1098 else
1099 {
1100 Assert(pAioMgr->fBlockingEventPending);
1101 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1102
1103 /* Release the waiting thread. */
1104 LogFlow(("Signalling waiter\n"));
1105 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1106 AssertRC(rc);
1107 }
1108 }
1109
1110 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1111 }
1112
1113 return rc;
1114}
1115
1116static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1117{
1118 int rc = VINF_SUCCESS;
1119 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1120 size_t cbTransfered = 0;
1121 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1122 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1123 PPDMACTASKFILE pTasksWaiting;
1124
1125 pEndpoint = pTask->pEndpoint;
1126
1127 /*
1128 * It is possible that the request failed on Linux with kernels < 2.6.23
1129 * if the passed buffer was allocated with remap_pfn_range or if the file
1130 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1131 * The endpoint will be migrated to a failsafe manager in case a request fails.
1132 */
1133 if (RT_FAILURE(rcReq))
1134 {
1135 /* Free bounce buffers and the IPRT request. */
1136 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = hReq;
1137 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
1138
1139 /* Free the lock and process pending tasks if neccessary */
1140 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1141 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1142 AssertRC(rc);
1143
1144 pAioMgr->cRequestsActive--;
1145 pEndpoint->AioMgr.cRequestsActive--;
1146 pEndpoint->AioMgr.cReqsProcessed++;
1147
1148 if (pTask->cbBounceBuffer)
1149 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1150
1151 /* Queue the request on the pending list. */
1152 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1153 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1154
1155 /* Create a new failsafe manager if neccessary. */
1156 if (!pEndpoint->AioMgr.fMoving)
1157 {
1158 PPDMACEPFILEMGR pAioMgrFailsafe;
1159
1160 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1161 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1162
1163 pEndpoint->AioMgr.fMoving = true;
1164
1165 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1166 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1167 AssertRC(rc);
1168
1169 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1170
1171 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1172 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1173 }
1174
1175 /* If this was the last request for the endpoint migrate it to the new manager. */
1176 if (!pEndpoint->AioMgr.cRequestsActive)
1177 {
1178 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1179 Assert(!fReqsPending);
1180
1181 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1182 AssertRC(rc);
1183 }
1184 }
1185 else
1186 {
1187 AssertMsg( RT_FAILURE(rcReq)
1188 || ( (cbTransfered == pTask->DataSeg.cbSeg)
1189 || (pTask->cbBounceBuffer && cbTransfered >= pTask->DataSeg.cbSeg)),
1190 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
1191
1192 if (pTask->fPrefetch)
1193 {
1194 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1195 Assert(pTask->cbBounceBuffer);
1196
1197 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1198 pTask->DataSeg.pvSeg,
1199 pTask->DataSeg.cbSeg);
1200
1201 /* Write it now. */
1202 pTask->fPrefetch = false;
1203 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1204 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1205
1206 /* Grow the file if needed. */
1207 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1208 {
1209 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1210 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1211 }
1212
1213 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1214 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1215 AssertRC(rc);
1216 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &hReq, 1);
1217 AssertRC(rc);
1218 }
1219 else
1220 {
1221 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1222 {
1223 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1224 memcpy(pTask->DataSeg.pvSeg,
1225 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1226 pTask->DataSeg.cbSeg);
1227
1228 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1229 }
1230
1231 /* Put the entry on the free array */
1232 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = hReq;
1233 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
1234
1235 pAioMgr->cRequestsActive--;
1236 pEndpoint->AioMgr.cRequestsActive--;
1237 pEndpoint->AioMgr.cReqsProcessed++;
1238
1239 /* Free the lock and process pending tasks if neccessary */
1240 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1241 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1242 AssertRC(rc);
1243
1244 /* Call completion callback */
1245 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1246 pdmacFileTaskFree(pEndpoint, pTask);
1247
1248 /*
1249 * If there is no request left on the endpoint but a flush request is set
1250 * it completed now and we notify the owner.
1251 * Furthermore we look for new requests and continue.
1252 */
1253 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1254 {
1255 /* Call completion callback */
1256 pTask = pEndpoint->pFlushReq;
1257 pEndpoint->pFlushReq = NULL;
1258
1259 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1260
1261 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1262 pdmacFileTaskFree(pEndpoint, pTask);
1263 }
1264 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1265 {
1266 /* If the endpoint is about to be migrated do it now. */
1267 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1268 Assert(!fReqsPending);
1269
1270 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1271 AssertRC(rc);
1272 }
1273 }
1274 } /* request completed successfully */
1275}
1276
1277/** Helper macro for checking for error codes. */
1278#define CHECK_RC(pAioMgr, rc) \
1279 if (RT_FAILURE(rc)) \
1280 {\
1281 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1282 return rc2;\
1283 }
1284
1285/**
1286 * The normal I/O manager using the RTFileAio* API
1287 *
1288 * @returns VBox status code.
1289 * @param ThreadSelf Handle of the thread.
1290 * @param pvUser Opaque user data.
1291 */
1292int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1293{
1294 int rc = VINF_SUCCESS;
1295 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1296 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1297
1298 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1299 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
1300 {
1301 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1302 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1303 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1304 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1305 AssertRC(rc);
1306
1307 LogFlow(("Got woken up\n"));
1308 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1309
1310 /* Check for an external blocking event first. */
1311 if (pAioMgr->fBlockingEventPending)
1312 {
1313 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1314 CHECK_RC(pAioMgr, rc);
1315 }
1316
1317 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
1318 {
1319 /* We got woken up because an endpoint issued new requests. Queue them. */
1320 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1321 CHECK_RC(pAioMgr, rc);
1322
1323 while ( pAioMgr->cRequestsActive
1324 || pAioMgr->fBwLimitReached)
1325 {
1326 if (pAioMgr->cRequestsActive)
1327 {
1328 RTFILEAIOREQ apReqs[20];
1329 uint32_t cReqsCompleted = 0;
1330 size_t cReqsWait;
1331
1332 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1333 cReqsWait = RT_ELEMENTS(apReqs);
1334 else
1335 cReqsWait = pAioMgr->cRequestsActive;
1336
1337 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
1338
1339 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1340 cReqsWait,
1341 RT_INDEFINITE_WAIT, apReqs,
1342 RT_ELEMENTS(apReqs), &cReqsCompleted);
1343 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1344 CHECK_RC(pAioMgr, rc);
1345
1346 LogFlow(("%d tasks completed\n", cReqsCompleted));
1347
1348 for (uint32_t i = 0; i < cReqsCompleted; i++)
1349 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1350
1351 /* Check for an external blocking event before we go to sleep again. */
1352 if (pAioMgr->fBlockingEventPending)
1353 {
1354 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1355 CHECK_RC(pAioMgr, rc);
1356 }
1357
1358 /* Update load statistics. */
1359 uint64_t uMillisCurr = RTTimeMilliTS();
1360 if (uMillisCurr > uMillisEnd)
1361 {
1362 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1363
1364 /* Calculate timespan. */
1365 uMillisCurr -= uMillisEnd;
1366
1367 while (pEndpointCurr)
1368 {
1369 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1370 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1371 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1372 }
1373
1374 /* Set new update interval */
1375 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1376 }
1377 }
1378 else
1379 {
1380 /*
1381 * Bandwidth limit reached for all endpoints.
1382 * Yield and wait until we have enough resources again.
1383 */
1384 RTThreadYield();
1385 }
1386
1387 /* Check endpoints for new requests. */
1388 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1389 CHECK_RC(pAioMgr, rc);
1390 } /* while requests are active. */
1391 } /* if still running */
1392 } /* while running */
1393
1394 return rc;
1395}
1396
1397#undef CHECK_RC
1398
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette