VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 24436

Last change on this file since 24436 was 24358, checked in by vboxsync, 15 years ago

AsyncCompletion: Don't enqueue requests if the endpoint is moving

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.6 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 24358 2009-11-04 21:54:42Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#define RT_STRICT
24#include <iprt/types.h>
25#include <iprt/asm.h>
26#include <iprt/file.h>
27#include <iprt/mem.h>
28#include <iprt/string.h>
29#include <iprt/assert.h>
30#include <VBox/log.h>
31
32#include "PDMAsyncCompletionFileInternal.h"
33
34/** The update period for the I/O load statistics in ms. */
35#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
36/** Maximum number of requests a manager will handle. */
37#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
38
39int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
40{
41 int rc = VINF_SUCCESS;
42
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
44 if (rc == VERR_OUT_OF_RANGE)
45 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
46
47 if (RT_SUCCESS(rc))
48 {
49 /* Initialize request handle array. */
50 pAioMgr->iFreeEntryNext = 0;
51 pAioMgr->iFreeReqNext = 0;
52 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
53 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
54
55 if (pAioMgr->pahReqsFree)
56 {
57 return VINF_SUCCESS;
58 }
59 else
60 {
61 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
62 rc = VERR_NO_MEMORY;
63 }
64 }
65
66 return rc;
67}
68
69void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
70{
71 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
72
73 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
74 {
75 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
76 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
77 }
78
79 RTMemFree(pAioMgr->pahReqsFree);
80}
81
82/**
83 * Sorts the endpoint list with insertion sort.
84 */
85static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
86{
87 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
88
89 pEpPrev = pAioMgr->pEndpointsHead;
90 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
91
92 while (pEpCurr)
93 {
94 /* Remember the next element to sort because the list might change. */
95 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
96
97 /* Unlink the current element from the list. */
98 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
99 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
100
101 if (pPrev)
102 pPrev->AioMgr.pEndpointNext = pNext;
103 else
104 pAioMgr->pEndpointsHead = pNext;
105
106 if (pNext)
107 pNext->AioMgr.pEndpointPrev = pPrev;
108
109 /* Go back until we reached the place to insert the current endpoint into. */
110 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
111 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
112
113 /* Link the endpoint into the list. */
114 if (pEpPrev)
115 pNext = pEpPrev->AioMgr.pEndpointNext;
116 else
117 pNext = pAioMgr->pEndpointsHead;
118
119 pEpCurr->AioMgr.pEndpointNext = pNext;
120 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
121
122 if (pNext)
123 pNext->AioMgr.pEndpointPrev = pEpCurr;
124
125 if (pEpPrev)
126 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
127 else
128 pAioMgr->pEndpointsHead = pEpCurr;
129
130 pEpCurr = pEpNextToSort;
131 }
132
133#ifdef DEBUG
134 /* Validate sorting alogrithm */
135 unsigned cEndpoints = 0;
136 pEpCurr = pAioMgr->pEndpointsHead;
137
138 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
139 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
140
141 while (pEpCurr)
142 {
143 cEndpoints++;
144
145 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
146 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
147
148 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
149 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
150
151 pEpCurr = pNext;
152 }
153
154 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
155
156#endif
157}
158
159/**
160 * Removes an endpoint from the currently assigned manager.
161 *
162 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
163 * FALSE otherwise.
164 * @param pEndpointRemove The endpoint to remove.
165 */
166static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
167{
168 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
169 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
170 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
171
172 pAioMgr->cEndpoints--;
173
174 if (pPrev)
175 pPrev->AioMgr.pEndpointNext = pNext;
176 else
177 pAioMgr->pEndpointsHead = pNext;
178
179 if (pNext)
180 pNext->AioMgr.pEndpointPrev = pPrev;
181
182 /* Make sure that there is no request pending on this manager for the endpoint. */
183 if (!pEndpointRemove->AioMgr.cRequestsActive)
184 {
185 Assert(!pEndpointRemove->pFlushReq);
186
187 /* Reopen the file so that the new endpoint can reassociate with the file */
188 RTFileClose(pEndpointRemove->File);
189 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
190 AssertRC(rc);
191 return false;
192 }
193
194 return true;
195}
196
197/**
198 * Creates a new I/O manager and spreads the I/O load of the endpoints
199 * between the given I/O manager and the new one.
200 *
201 * @returns nothing.
202 * @param pAioMgr The I/O manager with high I/O load.
203 */
204static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
205{
206 PPDMACEPFILEMGR pAioMgrNew = NULL;
207 int rc = VINF_SUCCESS;
208
209 /* Splitting can't be done with only one open endpoint. */
210 if (pAioMgr->cEndpoints > 1)
211 {
212 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
213 &pAioMgrNew, false);
214 if (RT_SUCCESS(rc))
215 {
216 /* We will sort the list by request count per second. */
217 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
218
219 /* Now move some endpoints to the new manager. */
220 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
221 unsigned cReqsOther = 0;
222 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
223
224 while (pCurr)
225 {
226 if (cReqsHere <= cReqsOther)
227 {
228 /*
229 * The other manager has more requests to handle now.
230 * We will keep the current endpoint.
231 */
232 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
233 cReqsHere += pCurr->AioMgr.cReqsPerSec;
234 pCurr = pCurr->AioMgr.pEndpointNext;
235 }
236 else
237 {
238 /* Move to other endpoint. */
239 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
240 cReqsOther += pCurr->AioMgr.cReqsPerSec;
241
242 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
243
244 pCurr = pCurr->AioMgr.pEndpointNext;
245
246 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
247
248 if (fReqsPending)
249 {
250 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
251 pMove->AioMgr.fMoving = true;
252 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
253 }
254 else
255 {
256 pMove->AioMgr.fMoving = false;
257 pMove->AioMgr.pAioMgrDst = NULL;
258 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
259 }
260 }
261 }
262 }
263 else
264 {
265 /* Don't process further but leave a log entry about reduced performance. */
266 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
267 }
268 }
269}
270
271/**
272 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
273 *
274 * @returns VBox status code
275 * @param pAioMgr The I/O manager the error ocurred on.
276 * @param rc The error code.
277 */
278static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
279{
280 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
281 pAioMgr, rc));
282 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
283 LogRel(("AIOMgr: Please contact the product vendor\n"));
284
285 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
286
287 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
288 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
289
290 AssertMsgFailed(("Implement\n"));
291 return VINF_SUCCESS;
292}
293
294/**
295 * Put a list of tasks in the pending request list of an endpoint.
296 */
297DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
298{
299 /* Add the rest of the tasks to the pending list */
300 if (!pEndpoint->AioMgr.pReqsPendingHead)
301 {
302 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
303 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
304 }
305 else
306 {
307 Assert(pEndpoint->AioMgr.pReqsPendingTail);
308 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
309 }
310
311 /* Update the tail. */
312 while (pTaskHead->pNext)
313 pTaskHead = pTaskHead->pNext;
314
315 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
316}
317
318/**
319 * Put one task in the pending request list of an endpoint.
320 */
321DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
322{
323 /* Add the rest of the tasks to the pending list */
324 if (!pEndpoint->AioMgr.pReqsPendingHead)
325 {
326 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
327 pEndpoint->AioMgr.pReqsPendingHead = pTask;
328 }
329 else
330 {
331 Assert(pEndpoint->AioMgr.pReqsPendingTail);
332 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
333 }
334
335 pEndpoint->AioMgr.pReqsPendingTail = pTask;
336}
337
338/**
339 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
340 */
341static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
342 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
343 PRTFILEAIOREQ pahReqs, size_t cReqs)
344{
345 int rc;
346
347 pAioMgr->cRequestsActive += cReqs;
348 pEndpoint->AioMgr.cRequestsActive += cReqs;
349
350 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
351 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
352
353 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
354 if (RT_FAILURE(rc))
355 {
356 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
357 {
358 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
359
360 /*
361 * We run out of resources.
362 * Need to check which requests got queued
363 * and put the rest on the pending list again.
364 */
365 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
366 {
367 pEpClass->fOutOfResourcesWarningPrinted = true;
368 LogRel(("AIOMgr: The operating system doesn't have enough resources "
369 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
370 }
371
372 for (size_t i = 0; i < cReqs; i++)
373 {
374 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
375
376 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
377 {
378 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
379 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
380
381 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
382
383 /* Put the entry on the free array */
384 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
385 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
386
387 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
388 pAioMgr->cRequestsActive--;
389 pEndpoint->AioMgr.cRequestsActive--;
390 }
391 }
392 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
393 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
394 }
395 else
396 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
397 }
398
399 return rc;
400}
401
402static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
403 PPDMACEPFILEMGR pAioMgr,
404 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
405{
406 RTFILEAIOREQ apReqs[20];
407 unsigned cRequests = 0;
408 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
409 int rc = VINF_SUCCESS;
410 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
411
412 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
413 ("Trying to process request lists of a non active endpoint!\n"));
414
415 /* Go through the list and queue the requests until we get a flush request */
416 while ( pTaskHead
417 && !pEndpoint->pFlushReq
418 && (cMaxRequests > 0)
419 && RT_SUCCESS(rc))
420 {
421 PPDMACTASKFILE pCurr = pTaskHead;
422
423 pTaskHead = pTaskHead->pNext;
424
425 pCurr->pNext = NULL;
426
427 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
428 ("Endpoints do not match\n"));
429
430 switch (pCurr->enmTransferType)
431 {
432 case PDMACTASKFILETRANSFER_FLUSH:
433 {
434 /* If there is no data transfer request this flush request finished immediately. */
435 if (!pEndpoint->AioMgr.cRequestsActive)
436 {
437 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
438 pdmacFileTaskFree(pEndpoint, pCurr);
439 }
440 else
441 {
442 pEndpoint->pFlushReq = pCurr;
443 }
444 break;
445 }
446 case PDMACTASKFILETRANSFER_READ:
447 case PDMACTASKFILETRANSFER_WRITE:
448 {
449 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
450 void *pvBuf = pCurr->DataSeg.pvSeg;
451
452 /* Get a request handle. */
453 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
454 {
455 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
456 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
457 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
458 }
459 else
460 {
461 rc = RTFileAioReqCreate(&hReq);
462 AssertRC(rc);
463 }
464
465 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
466
467 /* Check if the alignment requirements are met.
468 * Offset, transfer size and buffer address
469 * need to be on a 512 boundary. */
470 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
471 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
472 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
473
474 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
475 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
476 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
477 offStart, cbToTransfer, pEndpoint->cbFile));
478
479 pCurr->fPrefetch = false;
480
481 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
482 || RT_UNLIKELY(offStart != pCurr->Off)
483 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
484 {
485 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
486 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
487
488 /* Create bounce buffer. */
489 pCurr->fBounceBuffer = true;
490
491 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
492 pCurr->Off, offStart));
493 pCurr->uBounceBufOffset = pCurr->Off - offStart;
494
495 /** @todo: I think we need something like a RTMemAllocAligned method here.
496 * Current assumption is that the maximum alignment is 4096byte
497 * (GPT disk on Windows)
498 * so we can use RTMemPageAlloc here.
499 */
500 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
501 AssertPtr(pCurr->pvBounceBuffer);
502 pvBuf = pCurr->pvBounceBuffer;
503
504 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
505 {
506 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
507 || RT_UNLIKELY(offStart != pCurr->Off))
508 {
509 /* We have to fill the buffer first before we can update the data. */
510 LogFlow(("Prefetching data for task %#p\n", pCurr));
511 pCurr->fPrefetch = true;
512 enmTransferType = PDMACTASKFILETRANSFER_READ;
513 }
514 else
515 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
516 }
517 }
518 else
519 pCurr->fBounceBuffer = false;
520
521 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
522 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
523
524 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
525 {
526 /* Grow the file if needed. */
527 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
528 {
529 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
530 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
531 }
532
533 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
534 offStart, pvBuf, cbToTransfer, pCurr);
535 }
536 else
537 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
538 offStart, pvBuf, cbToTransfer, pCurr);
539 AssertRC(rc);
540
541 apReqs[cRequests] = hReq;
542 pEndpoint->AioMgr.cReqsProcessed++;
543 cMaxRequests--;
544 cRequests++;
545 if (cRequests == RT_ELEMENTS(apReqs))
546 {
547 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
548 cRequests = 0;
549 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
550 ("Unexpected return code\n"));
551 }
552 break;
553 }
554 default:
555 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
556 }
557 }
558
559 if (cRequests)
560 {
561 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
562 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
563 ("Unexpected return code rc=%Rrc\n", rc));
564 }
565
566 if (pTaskHead)
567 {
568 /* Add the rest of the tasks to the pending list */
569 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
570
571 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
572 {
573 /*
574 * The I/O manager has no room left for more requests
575 * but there are still requests to process.
576 * Create a new I/O manager and let it handle some endpoints.
577 */
578 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
579 }
580 }
581
582 /* Insufficient resources are not fatal. */
583 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
584 rc = VINF_SUCCESS;
585
586 return rc;
587}
588
589/**
590 * Adds all pending requests for the given endpoint
591 * until a flush request is encountered or there is no
592 * request anymore.
593 *
594 * @returns VBox status code.
595 * @param pAioMgr The async I/O manager for the endpoint
596 * @param pEndpoint The endpoint to get the requests from.
597 */
598static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
599 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
600{
601 int rc = VINF_SUCCESS;
602 PPDMACTASKFILE pTasksHead = NULL;
603
604 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
605 ("Trying to process request lists of a non active endpoint!\n"));
606
607 Assert(!pEndpoint->pFlushReq);
608
609 /* Check the pending list first */
610 if (pEndpoint->AioMgr.pReqsPendingHead)
611 {
612 LogFlow(("Queuing pending requests first\n"));
613
614 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
615 /*
616 * Clear the list as the processing routine will insert them into the list
617 * again if it gets a flush request.
618 */
619 pEndpoint->AioMgr.pReqsPendingHead = NULL;
620 pEndpoint->AioMgr.pReqsPendingTail = NULL;
621 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
622 AssertRC(rc);
623 }
624
625 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
626 {
627 /* Now the request queue. */
628 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
629 if (pTasksHead)
630 {
631 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
632 AssertRC(rc);
633 }
634 }
635
636 return rc;
637}
638
639static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
640{
641 int rc = VINF_SUCCESS;
642 bool fNotifyWaiter = false;
643
644 LogFlowFunc((": Enter\n"));
645
646 Assert(pAioMgr->fBlockingEventPending);
647
648 switch (pAioMgr->enmBlockingEvent)
649 {
650 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
651 {
652 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
653 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
654
655 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
656
657 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
658 pEndpointNew->AioMgr.pEndpointPrev = NULL;
659 if (pAioMgr->pEndpointsHead)
660 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
661 pAioMgr->pEndpointsHead = pEndpointNew;
662
663 /* Assign the completion point to this file. */
664 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
665 fNotifyWaiter = true;
666 pAioMgr->cEndpoints++;
667 break;
668 }
669 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
670 {
671 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
672 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
673
674 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
675 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
676 break;
677 }
678 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
679 {
680 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
681 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
682
683 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
684
685 /* Make sure all tasks finished. Process the queues a last time first. */
686 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
687 AssertRC(rc);
688
689 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
690 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
691 break;
692 }
693 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
694 {
695 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
696 if (!pAioMgr->cRequestsActive)
697 fNotifyWaiter = true;
698 break;
699 }
700 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
701 {
702 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
703 break;
704 }
705 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
706 {
707 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
708 fNotifyWaiter = true;
709 break;
710 }
711 default:
712 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
713 }
714
715 if (fNotifyWaiter)
716 {
717 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
718 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
719
720 /* Release the waiting thread. */
721 LogFlow(("Signalling waiter\n"));
722 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
723 AssertRC(rc);
724 }
725
726 LogFlowFunc((": Leave\n"));
727 return rc;
728}
729
730/**
731 * Checks all endpoints for pending events or new requests.
732 *
733 * @returns VBox status code.
734 * @param pAioMgr The I/O manager handle.
735 */
736static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
737{
738 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
739 int rc = VINF_SUCCESS;
740 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
741
742 while (pEndpoint)
743 {
744 if (!pEndpoint->pFlushReq
745 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
746 && !pEndpoint->AioMgr.fMoving)
747 {
748 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
749 if (RT_FAILURE(rc))
750 return rc;
751 }
752 else if (!pEndpoint->AioMgr.cRequestsActive)
753 {
754 /* Reopen the file so that the new endpoint can reassociate with the file */
755 RTFileClose(pEndpoint->File);
756 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
757 AssertRC(rc);
758
759 if (pEndpoint->AioMgr.fMoving)
760 {
761 pEndpoint->AioMgr.fMoving = false;
762 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
763 }
764 else
765 {
766 Assert(pAioMgr->fBlockingEventPending);
767 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
768
769 /* Release the waiting thread. */
770 LogFlow(("Signalling waiter\n"));
771 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
772 AssertRC(rc);
773 }
774 }
775
776 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
777 }
778
779 return rc;
780}
781
782/** Helper macro for checking for error codes. */
783#define CHECK_RC(pAioMgr, rc) \
784 if (RT_FAILURE(rc)) \
785 {\
786 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
787 return rc2;\
788 }
789
790/**
791 * The normal I/O manager using the RTFileAio* API
792 *
793 * @returns VBox status code.
794 * @param ThreadSelf Handle of the thread.
795 * @param pvUser Opaque user data.
796 */
797int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
798{
799 int rc = VINF_SUCCESS;
800 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
801 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
802
803 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
804 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
805 {
806 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
807 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
808 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
809 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
810 AssertRC(rc);
811
812 LogFlow(("Got woken up\n"));
813 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
814
815 /* Check for an external blocking event first. */
816 if (pAioMgr->fBlockingEventPending)
817 {
818 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
819 CHECK_RC(pAioMgr, rc);
820 }
821
822 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
823 {
824 /* We got woken up because an endpoint issued new requests. Queue them. */
825 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
826 CHECK_RC(pAioMgr, rc);
827
828 while (pAioMgr->cRequestsActive)
829 {
830 RTFILEAIOREQ apReqs[20];
831 uint32_t cReqsCompleted = 0;
832 size_t cReqsWait;
833
834 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
835 cReqsWait = RT_ELEMENTS(apReqs);
836 else
837 cReqsWait = pAioMgr->cRequestsActive;
838
839 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
840
841 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
842 cReqsWait,
843 RT_INDEFINITE_WAIT, apReqs,
844 RT_ELEMENTS(apReqs), &cReqsCompleted);
845 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
846 CHECK_RC(pAioMgr, rc);
847
848 LogFlow(("%d tasks completed\n", cReqsCompleted));
849
850 for (uint32_t i = 0; i < cReqsCompleted; i++)
851 {
852 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
853 size_t cbTransfered = 0;
854 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
855 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
856
857 pEndpoint = pTask->pEndpoint;
858
859 /*
860 * It is possible that the request failed on Linux with kernels < 2.6.23
861 * if the passed buffer was allocated with remap_pfn_range or if the file
862 * is on an NFS endpoint which does not support async and direct I/O at the same time.
863 * The endpoint will be migrated to a failsafe manager in case a request fails.
864 */
865 if (RT_FAILURE(rcReq))
866 {
867 /* Free bounce buffers and the IPRT request. */
868 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
869 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
870
871 pAioMgr->cRequestsActive--;
872 pEndpoint->AioMgr.cRequestsActive--;
873 pEndpoint->AioMgr.cReqsProcessed++;
874
875 if (pTask->fBounceBuffer)
876 RTMemFree(pTask->pvBounceBuffer);
877
878 /* Queue the request on the pending list. */
879 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
880 pEndpoint->AioMgr.pReqsPendingHead = pTask;
881
882 /* Create a new failsafe manager if neccessary. */
883 if (!pEndpoint->AioMgr.fMoving)
884 {
885 PPDMACEPFILEMGR pAioMgrFailsafe;
886
887 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
888 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
889
890 pEndpoint->AioMgr.fMoving = true;
891
892 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
893 &pAioMgrFailsafe, true);
894 AssertRC(rc);
895
896 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
897
898 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
899 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
900 }
901
902 /* If this was the last request for the endpoint migrate it to the new manager. */
903 if (!pEndpoint->AioMgr.cRequestsActive)
904 {
905 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
906 Assert(!fReqsPending);
907
908 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
909 AssertRC(rc);
910 }
911 }
912 else
913 {
914 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
915 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
916 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
917
918 if (pTask->fPrefetch)
919 {
920 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
921 Assert(pTask->fBounceBuffer);
922
923 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
924 pTask->DataSeg.pvSeg,
925 pTask->DataSeg.cbSeg);
926
927 /* Write it now. */
928 pTask->fPrefetch = false;
929 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
930 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
931
932 /* Grow the file if needed. */
933 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
934 {
935 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
936 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
937 }
938
939 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
940 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
941 AssertRC(rc);
942 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
943 AssertRC(rc);
944 }
945 else
946 {
947 if (pTask->fBounceBuffer)
948 {
949 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
950 memcpy(pTask->DataSeg.pvSeg,
951 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
952 pTask->DataSeg.cbSeg);
953
954 RTMemPageFree(pTask->pvBounceBuffer);
955 }
956
957 /* Put the entry on the free array */
958 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
959 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
960
961 pAioMgr->cRequestsActive--;
962 pEndpoint->AioMgr.cRequestsActive--;
963 pEndpoint->AioMgr.cReqsProcessed++;
964
965 /* Call completion callback */
966 pTask->pfnCompleted(pTask, pTask->pvUser);
967 pdmacFileTaskFree(pEndpoint, pTask);
968
969 /*
970 * If there is no request left on the endpoint but a flush request is set
971 * it completed now and we notify the owner.
972 * Furthermore we look for new requests and continue.
973 */
974 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
975 {
976 /* Call completion callback */
977 pTask = pEndpoint->pFlushReq;
978 pEndpoint->pFlushReq = NULL;
979
980 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
981
982 pTask->pfnCompleted(pTask, pTask->pvUser);
983 pdmacFileTaskFree(pEndpoint, pTask);
984 }
985 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
986 {
987 /* If the endpoint is about to be migrated do it now. */
988 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
989 Assert(!fReqsPending);
990
991 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
992 AssertRC(rc);
993 }
994 }
995 } /* request completed successfully */
996 } /* for every completed request */
997
998 /* Check for an external blocking event before we go to sleep again. */
999 if (pAioMgr->fBlockingEventPending)
1000 {
1001 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1002 CHECK_RC(pAioMgr, rc);
1003 }
1004
1005 /* Update load statistics. */
1006 uint64_t uMillisCurr = RTTimeMilliTS();
1007 if (uMillisCurr > uMillisEnd)
1008 {
1009 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1010
1011 /* Calculate timespan. */
1012 uMillisCurr -= uMillisEnd;
1013
1014 while (pEndpointCurr)
1015 {
1016 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1017 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1018 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1019 }
1020
1021 /* Set new update interval */
1022 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1023 }
1024
1025 /* Check endpoints for new requests. */
1026 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1027 CHECK_RC(pAioMgr, rc);
1028 } /* while requests are active. */
1029 } /* if still running */
1030 } /* while running */
1031
1032 return rc;
1033}
1034
1035#undef CHECK_RC
1036
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette