VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 23495

Last change on this file since 23495 was 23404, checked in by vboxsync, 15 years ago

AsyncCompletion: Handle out of resources case properly. Should fix assertion on Darwin

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.8 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 23404 2009-09-29 10:18:37Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <VBox/log.h>
29
30#include "PDMAsyncCompletionFileInternal.h"
31
32/** The update period for the I/O load statistics in ms. */
33#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
34/** Maximum number of requests a manager will handle. */
35#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
36
37int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
38{
39 int rc = VINF_SUCCESS;
40
41 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
42 if (rc == VERR_OUT_OF_RANGE)
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
44
45 if (RT_SUCCESS(rc))
46 {
47 /* Initialize request handle array. */
48 pAioMgr->iFreeEntryNext = 0;
49 pAioMgr->iFreeReqNext = 0;
50 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
51 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
52
53 if (pAioMgr->pahReqsFree)
54 {
55 return VINF_SUCCESS;
56 }
57 else
58 {
59 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
60 rc = VERR_NO_MEMORY;
61 }
62 }
63
64 return rc;
65}
66
67void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
68{
69 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
70
71 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
72 {
73 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
74 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
75 }
76
77 RTMemFree(pAioMgr->pahReqsFree);
78}
79
80/**
81 * Sorts the endpoint list with insertion sort.
82 */
83static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
84{
85 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
86
87 pEpPrev = pAioMgr->pEndpointsHead;
88 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
89
90 while (pEpCurr)
91 {
92 /* Remember the next element to sort because the list might change. */
93 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
94
95 /* Unlink the current element from the list. */
96 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
97 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
98
99 if (pPrev)
100 pPrev->AioMgr.pEndpointNext = pNext;
101 else
102 pAioMgr->pEndpointsHead = pNext;
103
104 if (pNext)
105 pNext->AioMgr.pEndpointPrev = pPrev;
106
107 /* Go back until we reached the place to insert the current endpoint into. */
108 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
109 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
110
111 /* Link the endpoint into the list. */
112 if (pEpPrev)
113 pNext = pEpPrev->AioMgr.pEndpointNext;
114 else
115 pNext = pAioMgr->pEndpointsHead;
116
117 pEpCurr->AioMgr.pEndpointNext = pNext;
118 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
119 pNext->AioMgr.pEndpointPrev = pEpCurr;
120 if (pEpPrev)
121 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
122 else
123 pAioMgr->pEndpointsHead = pEpCurr;
124
125 pEpCurr = pEpNextToSort;
126 }
127
128#ifdef DEBUG
129 /* Validate sorting alogrithm */
130 unsigned cEndpoints = 0;
131 pEpCurr = pAioMgr->pEndpointsHead;
132
133 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
134 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
135
136 while (pEpCurr)
137 {
138 cEndpoints++;
139
140 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
141 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
142
143 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
144 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
145
146 pEpCurr = pNext;
147 }
148
149 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
150
151#endif
152}
153
154/**
155 * Removes an endpoint from the currently assigned manager.
156 *
157 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
158 * FALSE otherwise.
159 * @param pEndpointRemove The endpoint to remove.
160 */
161static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
162{
163 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
164 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
165 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
166
167 pAioMgr->cEndpoints--;
168
169 if (pPrev)
170 pPrev->AioMgr.pEndpointNext = pNext;
171 else
172 pAioMgr->pEndpointsHead = pNext;
173
174 if (pNext)
175 pNext->AioMgr.pEndpointPrev = pPrev;
176
177 /* Make sure that there is no request pending on this manager for the endpoint. */
178 if (!pEndpointRemove->AioMgr.cRequestsActive)
179 {
180 Assert(!pEndpointRemove->pFlushReq);
181
182 /* Reopen the file so that the new endpoint can reassociate with the file */
183 RTFileClose(pEndpointRemove->File);
184 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
185 AssertRC(rc);
186 return false;
187 }
188
189 return true;
190}
191
192/**
193 * Creates a new I/O manager and spreads the I/O load of the endpoints
194 * between the given I/O manager and the new one.
195 *
196 * @returns nothing.
197 * @param pAioMgr The I/O manager with high I/O load.
198 */
199static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
200{
201 PPDMACEPFILEMGR pAioMgrNew = NULL;
202 int rc = VINF_SUCCESS;
203
204 /* Splitting can't be done with only one open endpoint. */
205 if (pAioMgr->cEndpoints > 1)
206 {
207 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
208 &pAioMgrNew);
209 if (RT_SUCCESS(rc))
210 {
211 /* We will sort the list by request count per second. */
212 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
213
214 /* Now move some endpoints to the new manager. */
215 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
216 unsigned cReqsOther = 0;
217 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
218
219 while (pCurr)
220 {
221 if (cReqsHere <= cReqsOther)
222 {
223 /*
224 * The other manager has more requests to handle now.
225 * We will keep the current endpoint.
226 */
227 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
228 cReqsHere += pCurr->AioMgr.cReqsPerSec;
229 pCurr = pCurr->AioMgr.pEndpointNext;
230 }
231 else
232 {
233 /* Move to other endpoint. */
234 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
235 cReqsOther += pCurr->AioMgr.cReqsPerSec;
236
237 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
238
239 pCurr = pCurr->AioMgr.pEndpointNext;
240
241 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
242
243 if (fReqsPending)
244 {
245 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
246 pMove->AioMgr.fMoving = true;
247 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
248 }
249 else
250 {
251 pMove->AioMgr.fMoving = false;
252 pMove->AioMgr.pAioMgrDst = NULL;
253 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
254 }
255 }
256 }
257 }
258 else
259 {
260 /* Don't process further but leave a log entry about reduced performance. */
261 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
262 }
263 }
264}
265
266/**
267 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
268 *
269 * @returns VBox status code
270 * @param pAioMgr The I/O manager the error ocurred on.
271 * @param rc The error code.
272 */
273static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
274{
275 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
276 pAioMgr, rc));
277 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
278 LogRel(("AIOMgr: Please contact the product vendor\n"));
279
280 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
281
282 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
283 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
284
285 AssertMsgFailed(("Implement\n"));
286 return VINF_SUCCESS;
287}
288
289/**
290 * Put a list of tasks in the pending request list of an endpoint.
291 */
292DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
293{
294 /* Add the rest of the tasks to the pending list */
295 if (!pEndpoint->AioMgr.pReqsPendingHead)
296 {
297 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
298 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
299 }
300 else
301 {
302 Assert(pEndpoint->AioMgr.pReqsPendingTail);
303 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
304 }
305
306 /* Update the tail. */
307 while (pTaskHead->pNext)
308 pTaskHead = pTaskHead->pNext;
309
310 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
311}
312
313/**
314 * Put one task in the pending request list of an endpoint.
315 */
316DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
317{
318 /* Add the rest of the tasks to the pending list */
319 if (!pEndpoint->AioMgr.pReqsPendingHead)
320 {
321 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
322 pEndpoint->AioMgr.pReqsPendingHead = pTask;
323 }
324 else
325 {
326 Assert(pEndpoint->AioMgr.pReqsPendingTail);
327 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
328 }
329
330 pEndpoint->AioMgr.pReqsPendingTail = pTask;
331}
332
333/**
334 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
335 */
336static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
337 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
338 PRTFILEAIOREQ pahReqs, size_t cReqs)
339{
340 int rc;
341
342 pAioMgr->cRequestsActive += cReqs;
343 pEndpoint->AioMgr.cRequestsActive += cReqs;
344
345 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
346 if (RT_FAILURE(rc))
347 {
348 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
349 {
350 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
351
352 /*
353 * We run out of resources.
354 * Need to check which requests got queued
355 * and put the rest on the pending list again.
356 */
357 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
358 {
359 pEpClass->fOutOfResourcesWarningPrinted = true;
360 LogRel(("AIOMgr: The operating system doesn't have enough resources "
361 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
362 }
363
364 for (size_t i = 0; i < cReqs; i++)
365 {
366 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
367
368 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
369 {
370 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
371 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
372
373 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
374
375 /* Put the entry on the free array */
376 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
377 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
378
379 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
380 pAioMgr->cRequestsActive--;
381 pEndpoint->AioMgr.cRequestsActive--;
382 }
383 }
384 }
385 else
386 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
387 }
388
389 return rc;
390}
391
392static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
393 PPDMACEPFILEMGR pAioMgr,
394 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
395{
396 RTFILEAIOREQ apReqs[20];
397 unsigned cRequests = 0;
398 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
399 int rc = VINF_SUCCESS;
400 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
401
402 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
403 ("Trying to process request lists of a non active endpoint!\n"));
404
405 /* Go through the list and queue the requests until we get a flush request */
406 while (pTaskHead && !pEndpoint->pFlushReq && (cMaxRequests > 0))
407 {
408 PPDMACTASKFILE pCurr = pTaskHead;
409
410 pTaskHead = pTaskHead->pNext;
411
412 pCurr->pNext = NULL;
413
414 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
415 ("Endpoints do not match\n"));
416
417 switch (pCurr->enmTransferType)
418 {
419 case PDMACTASKFILETRANSFER_FLUSH:
420 {
421 /* If there is no data transfer request this flush request finished immediately. */
422 if (!pEndpoint->AioMgr.cRequestsActive)
423 {
424 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
425 pdmacFileTaskFree(pEndpoint, pCurr);
426 }
427 else
428 {
429 pEndpoint->pFlushReq = pCurr;
430
431 /* Do not process the task list further until the flush finished. */
432 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
433 }
434 break;
435 }
436 case PDMACTASKFILETRANSFER_READ:
437 case PDMACTASKFILETRANSFER_WRITE:
438 {
439 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
440 void *pvBuf = pCurr->DataSeg.pvSeg;
441
442 /* Get a request handle. */
443 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
444 {
445 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
446 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
447 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
448 }
449 else
450 {
451 rc = RTFileAioReqCreate(&hReq);
452 AssertRC(rc);
453 }
454
455 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
456
457 /* Check if the alignment requirements are met.
458 * Offset, transfer size and buffer address
459 * need to be on a 512 boundary. */
460 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg, 512);
461 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
462 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
463
464 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
465 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
466 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
467 offStart, cbToTransfer, pEndpoint->cbFile));
468
469 pCurr->fPrefetch = false;
470
471 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
472 || RT_UNLIKELY(offStart != pCurr->Off)
473 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
474 {
475 /* Create bounce buffer. */
476 pCurr->fBounceBuffer = true;
477
478 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
479 pCurr->Off, offStart));
480 pCurr->uBounceBufOffset = pCurr->Off - offStart;
481
482 /** @todo: I think we need something like a RTMemAllocAligned method here.
483 * Current assumption is that the maximum alignment is 4096byte
484 * (GPT disk on Windows)
485 * so we can use RTMemPageAlloc here.
486 */
487 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
488 AssertPtr(pCurr->pvBounceBuffer);
489 pvBuf = pCurr->pvBounceBuffer;
490
491 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
492 {
493 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
494 || RT_UNLIKELY(offStart != pCurr->Off))
495 {
496 /* We have to fill the buffer first before we can update the data. */
497 pCurr->fPrefetch = true;
498 enmTransferType = PDMACTASKFILETRANSFER_READ;
499 }
500 else
501 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
502 }
503 }
504 else
505 pCurr->fBounceBuffer = false;
506
507 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
508 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
509
510 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
511 {
512 /* Grow the file if needed. */
513 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
514 {
515 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
516 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
517 }
518
519 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
520 offStart, pvBuf, cbToTransfer, pCurr);
521 }
522 else
523 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
524 offStart, pvBuf, cbToTransfer, pCurr);
525 AssertRC(rc);
526
527 apReqs[cRequests] = hReq;
528 pEndpoint->AioMgr.cReqsProcessed++;
529 cMaxRequests--;
530 cRequests++;
531 if (cRequests == RT_ELEMENTS(apReqs))
532 {
533 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
534 cRequests = 0;
535 if (RT_FAILURE(rc))
536 {
537 AssertMsg(rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES, ("Unexpected return code\n"));
538 break;
539 }
540 }
541 break;
542 }
543 default:
544 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
545 }
546 }
547
548 if (cRequests)
549 {
550 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
551 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
552 ("Unexpected return code rc=%Rrc\n", rc));
553 }
554
555 if (pTaskHead)
556 {
557 /* Add the rest of the tasks to the pending list */
558 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
559
560
561 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
562 {
563 /*
564 * The I/O manager has no room left for more requests
565 * but there are still requests to process.
566 * Create a new I/O manager and let it handle some endpoints.
567 */
568 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
569 }
570 }
571
572 /* Insufficient resources are not fatal. */
573 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
574 rc = VINF_SUCCESS;
575
576 return rc;
577}
578
579/**
580 * Adds all pending requests for the given endpoint
581 * until a flush request is encountered or there is no
582 * request anymore.
583 *
584 * @returns VBox status code.
585 * @param pAioMgr The async I/O manager for the endpoint
586 * @param pEndpoint The endpoint to get the requests from.
587 */
588static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
589 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
590{
591 int rc = VINF_SUCCESS;
592 PPDMACTASKFILE pTasksHead = NULL;
593
594 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
595 ("Trying to process request lists of a non active endpoint!\n"));
596
597 Assert(!pEndpoint->pFlushReq);
598
599 /* Check the pending list first */
600 if (pEndpoint->AioMgr.pReqsPendingHead)
601 {
602 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
603 /*
604 * Clear the list as the processing routine will insert them into the list
605 * again if it gets a flush request.
606 */
607 pEndpoint->AioMgr.pReqsPendingHead = NULL;
608 pEndpoint->AioMgr.pReqsPendingTail = NULL;
609 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
610 AssertRC(rc);
611 }
612
613 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
614 {
615 /* Now the request queue. */
616 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
617 if (pTasksHead)
618 {
619 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
620 AssertRC(rc);
621 }
622 }
623
624 return rc;
625}
626
627static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
628{
629 int rc = VINF_SUCCESS;
630 bool fNotifyWaiter = false;
631
632 Assert(pAioMgr->fBlockingEventPending);
633
634 switch (pAioMgr->enmBlockingEvent)
635 {
636 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
637 {
638 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
639 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
640
641 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
642
643 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
644 pEndpointNew->AioMgr.pEndpointPrev = NULL;
645 if (pAioMgr->pEndpointsHead)
646 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
647 pAioMgr->pEndpointsHead = pEndpointNew;
648
649 /* Assign the completion point to this file. */
650 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
651 fNotifyWaiter = true;
652 pAioMgr->cEndpoints++;
653 break;
654 }
655 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
656 {
657 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
658 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
659
660 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
661 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
662 break;
663 }
664 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
665 {
666 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
667 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
668
669 /* Make sure all tasks finished. Process the queues a last time first. */
670 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
671 AssertRC(rc);
672
673 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
674 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
675 break;
676 }
677 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
678 {
679 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
680 if (!pAioMgr->cRequestsActive)
681 fNotifyWaiter = true;
682 break;
683 }
684 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
685 {
686 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
687 break;
688 }
689 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
690 {
691 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
692 fNotifyWaiter = true;
693 break;
694 }
695 default:
696 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
697 }
698
699 if (fNotifyWaiter)
700 {
701 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
702 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
703
704 /* Release the waiting thread. */
705 LogFlow(("Signalling waiter\n"));
706 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
707 AssertRC(rc);
708 }
709
710 return rc;
711}
712
713/** Helper macro for checking for error codes. */
714#define CHECK_RC(pAioMgr, rc) \
715 if (RT_FAILURE(rc)) \
716 {\
717 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
718 return rc2;\
719 }
720
721/**
722 * The normal I/O manager using the RTFileAio* API
723 *
724 * @returns VBox status code.
725 * @param ThreadSelf Handle of the thread.
726 * @param pvUser Opaque user data.
727 */
728int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
729{
730 int rc = VINF_SUCCESS;
731 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
732 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
733
734 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
735 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
736 {
737 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
738 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
739 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
740 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
741 AssertRC(rc);
742
743 LogFlow(("Got woken up\n"));
744 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
745
746 /* Check for an external blocking event first. */
747 if (pAioMgr->fBlockingEventPending)
748 {
749 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
750 CHECK_RC(pAioMgr, rc);
751 }
752
753 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
754 {
755 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
756 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
757
758 while (pEndpoint)
759 {
760 if (!pEndpoint->pFlushReq && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE))
761 {
762 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
763 CHECK_RC(pAioMgr, rc);
764 }
765
766 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
767 }
768
769 while (pAioMgr->cRequestsActive)
770 {
771 RTFILEAIOREQ apReqs[20];
772 uint32_t cReqsCompleted = 0;
773 size_t cReqsWait;
774
775 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
776 cReqsWait = RT_ELEMENTS(apReqs);
777 else
778 cReqsWait = pAioMgr->cRequestsActive;
779
780 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
781 cReqsWait,
782 RT_INDEFINITE_WAIT, apReqs,
783 RT_ELEMENTS(apReqs), &cReqsCompleted);
784 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
785 CHECK_RC(pAioMgr, rc);
786
787 for (uint32_t i = 0; i < cReqsCompleted; i++)
788 {
789 size_t cbTransfered = 0;
790 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
791 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
792
793 pEndpoint = pTask->pEndpoint;
794
795 AssertMsg( RT_SUCCESS(rcReq)
796 && ( (cbTransfered == pTask->DataSeg.cbSeg)
797 || (pTask->fBounceBuffer)),
798 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rc, cbTransfered));
799
800 if (pTask->fPrefetch)
801 {
802 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
803 Assert(pTask->fBounceBuffer);
804
805 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
806 pTask->DataSeg.pvSeg,
807 pTask->DataSeg.cbSeg);
808
809 /* Write it now. */
810 pTask->fPrefetch = false;
811 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
812 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
813
814 /* Grow the file if needed. */
815 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
816 {
817 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
818 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
819 }
820
821 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
822 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
823 AssertRC(rc);
824 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
825 AssertRC(rc);
826 }
827 else
828 {
829 if (pTask->fBounceBuffer)
830 {
831 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
832 memcpy(pTask->DataSeg.pvSeg,
833 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
834 pTask->DataSeg.cbSeg);
835
836 RTMemPageFree(pTask->pvBounceBuffer);
837 }
838
839 /* Put the entry on the free array */
840 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
841 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
842
843 pAioMgr->cRequestsActive--;
844 pEndpoint->AioMgr.cRequestsActive--;
845 pEndpoint->AioMgr.cReqsProcessed++;
846
847 /* Call completion callback */
848 pTask->pfnCompleted(pTask, pTask->pvUser);
849 pdmacFileTaskFree(pEndpoint, pTask);
850
851 /*
852 * If there is no request left on the endpoint but a flush request is set
853 * it completed now and we notify the owner.
854 * Furthermore we look for new requests and continue.
855 */
856 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
857 {
858 /* Call completion callback */
859 pTask = pEndpoint->pFlushReq;
860 pEndpoint->pFlushReq = NULL;
861
862 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
863
864 pTask->pfnCompleted(pTask, pTask->pvUser);
865 pdmacFileTaskFree(pEndpoint, pTask);
866 }
867 }
868
869 if (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
870 {
871 if (!pEndpoint->pFlushReq)
872 {
873 /* Check if there are events on the endpoint. */
874 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
875 CHECK_RC(pAioMgr, rc);
876 }
877 }
878 else if (!pEndpoint->AioMgr.cRequestsActive)
879 {
880 /* Reopen the file so that the new endpoint can reassociate with the file */
881 RTFileClose(pEndpoint->File);
882 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
883 AssertRC(rc);
884
885 if (pEndpoint->AioMgr.fMoving)
886 {
887 pEndpoint->AioMgr.fMoving = false;
888 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
889 }
890 else
891 {
892 Assert(pAioMgr->fBlockingEventPending);
893 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
894
895 /* Release the waiting thread. */
896 LogFlow(("Signalling waiter\n"));
897 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
898 AssertRC(rc);
899 }
900 }
901 }
902
903 /* Check for an external blocking event before we go to sleep again. */
904 if (pAioMgr->fBlockingEventPending)
905 {
906 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
907 CHECK_RC(pAioMgr, rc);
908 }
909
910 /* Update load statistics. */
911 uint64_t uMillisCurr = RTTimeMilliTS();
912 if (uMillisCurr > uMillisEnd)
913 {
914 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
915
916 /* Calculate timespan. */
917 uMillisCurr -= uMillisEnd;
918
919 while (pEndpointCurr)
920 {
921 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
922 pEndpointCurr->AioMgr.cReqsProcessed = 0;
923 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
924 }
925
926 /* Set new update interval */
927 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
928 }
929 }
930 }
931 }
932
933 return rc;
934}
935
936#undef CHECK_RC
937
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette