VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFile.cpp@ 26522

Last change on this file since 26522 was 26338, checked in by vboxsync, 15 years ago

AsyncCompletion: Introduce range locks to prevent concurrent access to the same file range. Fixes inconsistent data for tasks with unaligned tasks where we have to use bounce buffers (i.e block table updates when a VDI file grows)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 31.7 KB
Line 
1/* $Id: PDMAsyncCompletionFile.cpp 26338 2010-02-09 00:54:20Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 */
5
6/*
7 * Copyright (C) 2006-2009 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
27#define RT_STRICT
28//#define DEBUG
29#include "PDMInternal.h"
30#include <VBox/pdm.h>
31#include <VBox/mm.h>
32#include <VBox/vm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35
36#include <iprt/asm.h>
37#include <iprt/assert.h>
38#include <iprt/critsect.h>
39#include <iprt/env.h>
40#include <iprt/file.h>
41#include <iprt/mem.h>
42#include <iprt/semaphore.h>
43#include <iprt/string.h>
44#include <iprt/thread.h>
45#include <iprt/path.h>
46
47#include "PDMAsyncCompletionFileInternal.h"
48
49/**
50 * Frees a task.
51 *
52 * @returns nothing.
53 * @param pEndpoint Pointer to the endpoint the segment was for.
54 * @param pTask The task to free.
55 */
56void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
57 PPDMACTASKFILE pTask)
58{
59 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
60
61 LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask));
62
63 /* Try the per endpoint cache first. */
64 if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax)
65 {
66 /* Add it to the list. */
67 pEndpoint->pTasksFreeTail->pNext = pTask;
68 pEndpoint->pTasksFreeTail = pTask;
69 ASMAtomicIncU32(&pEndpoint->cTasksCached);
70 }
71 else if (false)
72 {
73 /* Bigger class cache */
74 }
75 else
76 {
77 Log(("Freeing task %p because all caches are full\n", pTask));
78 MMR3HeapFree(pTask);
79 }
80}
81
82/**
83 * Allocates a task segment
84 *
85 * @returns Pointer to the new task segment or NULL
86 * @param pEndpoint Pointer to the endpoint
87 */
88PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
89{
90 PPDMACTASKFILE pTask = NULL;
91
92 /* Try the small per endpoint cache first. */
93 if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail)
94 {
95 /* Try the bigger endpoint class cache. */
96 PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
97
98#if 0
99 /* We start with the assigned slot id to distribute the load when allocating new tasks. */
100 unsigned iSlot = pEndpoint->iSlotStart;
101 do
102 {
103 pTask = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
104 if (pTask)
105 break;
106
107 iSlot = (iSlot + 1) % RT_ELEMENTS(pEndpointClass->apTaskCache);
108 } while (iSlot != pEndpoint->iSlotStart);
109#endif
110 if (!pTask)
111 {
112 /*
113 * Allocate completely new.
114 * If this fails we return NULL.
115 */
116 int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
117 sizeof(PDMACTASKFILE),
118 (void **)&pTask);
119 if (RT_FAILURE(rc))
120 pTask = NULL;
121
122 LogFlow(("Allocated task %p\n", pTask));
123 }
124#if 0
125 else
126 {
127 /* Remove the first element and put the rest into the slot again. */
128 PPDMASYNCCOMPLETIONTASK pTaskHeadNew = pTask->pNext;
129
130 pTaskHeadNew->pPrev = NULL;
131
132 /* Put back into the list adding any new tasks. */
133 while (true)
134 {
135 bool fChanged = ASMAtomicCmpXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], pTaskHeadNew, NULL);
136
137 if (fChanged)
138 break;
139
140 PPDMASYNCCOMPLETIONTASK pTaskHead = (PPDMASYNCCOMPLETIONTASK)ASMAtomicXchgPtr((void * volatile *)&pEndpointClass->apTaskCache[iSlot], NULL);
141
142 /* The new task could be taken inbetween */
143 if (pTaskHead)
144 {
145 /* Go to the end of the probably much shorter new list. */
146 PPDMASYNCCOMPLETIONTASK pTaskTail = pTaskHead;
147 while (pTaskTail->pNext)
148 pTaskTail = pTaskTail->pNext;
149
150 /* Concatenate */
151 pTaskTail->pNext = pTaskHeadNew;
152
153 pTaskHeadNew = pTaskHead;
154 }
155 /* Another round trying to change the list. */
156 }
157 /* We got a task from the global cache so decrement the counter */
158 ASMAtomicDecU32(&pEndpointClass->cTasksCached);
159 }
160#endif
161 }
162 else
163 {
164 /* Grab a free task from the head. */
165 AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
166
167 pTask = pEndpoint->pTasksFreeHead;
168 pEndpoint->pTasksFreeHead = pTask->pNext;
169 ASMAtomicDecU32(&pEndpoint->cTasksCached);
170 }
171
172 pTask->pNext = NULL;
173
174 return pTask;
175}
176
177PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
178{
179 PPDMACTASKFILE pTasks = NULL;
180
181 /*
182 * Get pending tasks.
183 */
184 pTasks = (PPDMACTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, NULL);
185
186 /* Reverse the list to process in FIFO order. */
187 if (pTasks)
188 {
189 PPDMACTASKFILE pTask = pTasks;
190
191 pTasks = NULL;
192
193 while (pTask)
194 {
195 PPDMACTASKFILE pCur = pTask;
196 pTask = pTask->pNext;
197 pCur->pNext = pTasks;
198 pTasks = pCur;
199 }
200 }
201
202 return pTasks;
203}
204
205static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr)
206{
207 bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true);
208
209 if (!fWokenUp)
210 {
211 int rc = VINF_SUCCESS;
212 bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem);
213
214 if (fWaitingEventSem)
215 rc = RTSemEventSignal(pAioMgr->EventSem);
216
217 AssertRC(rc);
218 }
219}
220
221static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
222{
223 int rc = VINF_SUCCESS;
224
225 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent);
226 Assert(!pAioMgr->fBlockingEventPending);
227 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true);
228
229 /* Wakeup the async I/O manager */
230 pdmacFileAioMgrWakeup(pAioMgr);
231
232 /* Wait for completion. */
233 rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT);
234 AssertRC(rc);
235
236 ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false);
237 ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
238
239 return rc;
240}
241
242int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
243{
244 int rc;
245
246 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
247 AssertRCReturn(rc, rc);
248
249 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint);
250 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT);
251
252 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
253
254 if (RT_SUCCESS(rc))
255 ASMAtomicWritePtr((void * volatile *)&pEndpoint->pAioMgr, pAioMgr);
256
257 return rc;
258}
259
260static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
261{
262 int rc;
263
264 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
265 AssertRCReturn(rc, rc);
266
267 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint);
268 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT);
269
270 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
271
272 return rc;
273}
274
275static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
276{
277 int rc;
278
279 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
280 AssertRCReturn(rc, rc);
281
282 ASMAtomicWritePtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint);
283 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT);
284
285 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
286
287 return rc;
288}
289
290static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr)
291{
292 int rc;
293
294 rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
295 AssertRCReturn(rc, rc);
296
297 rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN);
298
299 RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
300
301 return rc;
302}
303
304int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
305{
306 PPDMACTASKFILE pNext;
307 do
308 {
309 pNext = pEndpoint->pTasksNewHead;
310 pTask->pNext = pNext;
311 } while (!ASMAtomicCmpXchgPtr((void * volatile *)&pEndpoint->pTasksNewHead, (void *)pTask, (void *)pNext));
312
313 pdmacFileAioMgrWakeup((PPDMACEPFILEMGR)ASMAtomicReadPtr((void * volatile *)&pEndpoint->pAioMgr));
314
315 return VINF_SUCCESS;
316}
317
318void pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
319{
320 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser;
321
322 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
323 {
324 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
325 }
326 else
327 {
328 uint32_t uOld = ASMAtomicSubU32(&pTaskFile->cbTransferLeft, pTask->DataSeg.cbSeg);
329
330 if (!(uOld - pTask->DataSeg.cbSeg)
331 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
332 pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, true);
333 }
334}
335
336int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask,
337 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
338 PCPDMDATASEG paSegments, size_t cSegments,
339 size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer)
340{
341 int rc = VINF_SUCCESS;
342 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
343 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
344 PPDMACEPFILEMGR pAioMgr = pEpFile->pAioMgr;
345
346 Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ)
347 || (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
348
349 ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, cbTransfer);
350 ASMAtomicWriteBool(&pTaskFile->fCompleted, false);
351
352 for (unsigned i = 0; i < cSegments; i++)
353 {
354 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
355 AssertPtr(pIoTask);
356
357 pIoTask->pEndpoint = pEpFile;
358 pIoTask->enmTransferType = enmTransfer;
359 pIoTask->Off = off;
360 pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg;
361 pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg;
362 pIoTask->pvUser = pTaskFile;
363 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
364
365 /* Send it off to the I/O manager. */
366 pdmacFileEpAddTask(pEpFile, pIoTask);
367 off += paSegments[i].cbSeg;
368 cbTransfer -= paSegments[i].cbSeg;
369 }
370
371 AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer));
372
373 if (ASMAtomicReadS32(&pTaskFile->cbTransferLeft) == 0
374 && !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
375 pdmR3AsyncCompletionCompleteTask(pTask, false);
376 else
377 rc = VINF_AIO_TASK_PENDING;
378
379 return rc;
380}
381
382/**
383 * Creates a new async I/O manager.
384 *
385 * @returns VBox status code.
386 * @param pEpClass Pointer to the endpoint class data.
387 * @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
388 * @param fFailsafe Flag to force a failsafe manager even if the global flag is not set.
389 */
390int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr, bool fFailsafe)
391{
392 int rc = VINF_SUCCESS;
393 PPDMACEPFILEMGR pAioMgrNew;
394
395 LogFlowFunc((": Entered\n"));
396
397 rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
398 if (RT_SUCCESS(rc))
399 {
400 pAioMgrNew->fFailsafe = fFailsafe || pEpClass->fFailsafe;
401
402 rc = RTSemEventCreate(&pAioMgrNew->EventSem);
403 if (RT_SUCCESS(rc))
404 {
405 rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock);
406 if (RT_SUCCESS(rc))
407 {
408 rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent);
409 if (RT_SUCCESS(rc))
410 {
411 /* Init the rest of the manager. */
412 if (!pAioMgrNew->fFailsafe)
413 rc = pdmacFileAioMgrNormalInit(pAioMgrNew);
414
415 if (RT_SUCCESS(rc))
416 {
417 pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING;
418
419 rc = RTThreadCreateF(&pAioMgrNew->Thread,
420 pAioMgrNew->fFailsafe
421 ? pdmacFileAioMgrFailsafe
422 : pdmacFileAioMgrNormal,
423 pAioMgrNew,
424 0,
425 RTTHREADTYPE_IO,
426 0,
427 "AioMgr%d-%s", pEpClass->cAioMgrs,
428 pAioMgrNew->fFailsafe
429 ? "F"
430 : "N");
431 if (RT_SUCCESS(rc))
432 {
433 /* Link it into the list. */
434 RTCritSectEnter(&pEpClass->CritSect);
435 pAioMgrNew->pNext = pEpClass->pAioMgrHead;
436 if (pEpClass->pAioMgrHead)
437 pEpClass->pAioMgrHead->pPrev = pAioMgrNew;
438 pEpClass->pAioMgrHead = pAioMgrNew;
439 pEpClass->cAioMgrs++;
440 RTCritSectLeave(&pEpClass->CritSect);
441
442 *ppAioMgr = pAioMgrNew;
443
444 Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread)));
445 return VINF_SUCCESS;
446 }
447 pdmacFileAioMgrNormalDestroy(pAioMgrNew);
448 }
449 RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent);
450 }
451 RTSemEventDestroy(pAioMgrNew->EventSem);
452 }
453 RTSemEventDestroy(pAioMgrNew->EventSemBlock);
454 }
455 MMR3HeapFree(pAioMgrNew);
456 }
457
458 LogFlowFunc((": Leave rc=%Rrc\n", rc));
459
460 return rc;
461}
462
463/**
464 * Destroys a async I/O manager.
465 *
466 * @returns nothing.
467 * @param pAioMgr The async I/O manager to destroy.
468 */
469static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
470{
471 int rc = pdmacFileAioMgrShutdown(pAioMgr);
472 AssertRC(rc);
473
474 /* Unlink from the list. */
475 rc = RTCritSectEnter(&pEpClassFile->CritSect);
476 AssertRC(rc);
477
478 PPDMACEPFILEMGR pPrev = pAioMgr->pPrev;
479 PPDMACEPFILEMGR pNext = pAioMgr->pNext;
480
481 if (pPrev)
482 pPrev->pNext = pNext;
483 else
484 pEpClassFile->pAioMgrHead = pNext;
485
486 if (pNext)
487 pNext->pPrev = pPrev;
488
489 pEpClassFile->cAioMgrs--;
490
491 rc = RTCritSectLeave(&pEpClassFile->CritSect);
492 AssertRC(rc);
493
494 /* Free the ressources. */
495 RTCritSectDelete(&pAioMgr->CritSectBlockingEvent);
496 RTSemEventDestroy(pAioMgr->EventSem);
497 if (!pAioMgr->fFailsafe)
498 pdmacFileAioMgrNormalDestroy(pAioMgr);
499
500 MMR3HeapFree(pAioMgr);
501}
502
503static int pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)
504{
505 int rc = VINF_SUCCESS;
506 RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */
507
508 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
509
510 rc = RTFileAioGetLimits(&AioLimits);
511#ifdef DEBUG
512 if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK"))
513 rc = VERR_ENV_VAR_NOT_FOUND;
514#endif
515 if (RT_FAILURE(rc))
516 {
517 LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to failsafe manager\n",
518 rc));
519 pEpClassFile->fFailsafe = true;
520 }
521 else
522 {
523 pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
524 pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax;
525
526 /* The user can force the failsafe manager. */
527 rc = CFGMR3QueryBoolDef(pCfgNode, "UseFailsafeIo", &pEpClassFile->fFailsafe, false);
528 AssertLogRelRCReturn(rc, rc);
529
530 if (pEpClassFile->fFailsafe)
531 LogRel(("AIOMgr: Failsafe I/O was requested by user\n"));
532 }
533
534 /* Init critical section. */
535 rc = RTCritSectInit(&pEpClassFile->CritSect);
536 if (RT_SUCCESS(rc))
537 {
538 /* Check if the host cache should be used too. */
539#ifndef RT_OS_LINUX
540 rc = CFGMR3QueryBoolDef(pCfgNode, "HostCacheEnabled", &pEpClassFile->fHostCacheEnabled, false);
541 AssertLogRelRCReturn(rc, rc);
542#else
543 /*
544 * Host cache + async I/O is not supported on Linux. Check if the user enabled the cache,
545 * leave a warning and disable it always.
546 */
547 bool fDummy;
548 rc = CFGMR3QueryBool(pCfgNode, "HostCacheEnabled", &fDummy);
549 if (RT_SUCCESS(rc))
550 LogRel(("AIOMgr: The host cache is not supported with async I/O on Linux\n"));
551
552 pEpClassFile->fHostCacheEnabled = false;
553#endif
554
555 /* Check if the cache was disabled by the user. */
556 rc = CFGMR3QueryBoolDef(pCfgNode, "CacheEnabled", &pEpClassFile->fCacheEnabled, true);
557 AssertLogRelRCReturn(rc, rc);
558
559 if (pEpClassFile->fCacheEnabled)
560 {
561 /* Init cache structure */
562 rc = pdmacFileCacheInit(pEpClassFile, pCfgNode);
563 if (RT_FAILURE(rc))
564 {
565 RTCritSectDelete(&pEpClassFile->CritSect);
566 pEpClassFile->fCacheEnabled = false;
567 LogRel(("AIOMgr: Failed to initialise the cache (rc=%Rrc), disabled caching\n"));
568 }
569 }
570 else
571 LogRel(("AIOMgr: Cache was globally disabled\n"));
572 }
573
574 return rc;
575}
576
577static void pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)
578{
579 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
580
581 /* All endpoints should be closed at this point. */
582 AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n"));
583
584 /* Destroy all left async I/O managers. */
585 while (pEpClassFile->pAioMgrHead)
586 pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead);
587
588 /* Destroy the cache. */
589 if (pEpClassFile->fCacheEnabled)
590 pdmacFileCacheDestroy(pEpClassFile);
591
592 RTCritSectDelete(&pEpClassFile->CritSect);
593}
594
595static int pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint,
596 const char *pszUri, uint32_t fFlags)
597{
598 int rc = VINF_SUCCESS;
599 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
600 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
601 bool fUseFailsafeManager = pEpClassFile->fFailsafe;
602
603 AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_CACHING)) == 0,
604 ("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
605
606 unsigned fFileFlags = fFlags & PDMACEP_FILE_FLAGS_READ_ONLY
607 ? RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE
608 : RTFILE_O_READWRITE | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE;
609
610 if (!pEpClassFile->fFailsafe)
611 {
612 fFileFlags |= (RTFILE_O_ASYNC_IO | RTFILE_O_WRITE_THROUGH);
613
614 /*
615 * We only disable the cache if the size of the file is a multiple of 512.
616 * Certain hosts like Windows, Linux and Solaris require that transfer sizes
617 * are aligned to the volume sector size.
618 * If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
619 * which will trash the host cache but ensures that the host cache will not
620 * contain dirty buffers.
621 */
622 RTFILE File = NIL_RTFILE;
623
624 rc = RTFileOpen(&File, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
625 if (RT_SUCCESS(rc))
626 {
627 uint64_t cbSize;
628
629 rc = RTFileGetSize(File, &cbSize);
630 if (RT_SUCCESS(rc) && ((cbSize % 512) == 0))
631 {
632 fFileFlags &= ~RTFILE_O_WRITE_THROUGH;
633
634#if defined(RT_OS_LINUX)
635 AssertMsg(!pEpClassFile->fHostCacheEnabled, ("Host cache + async I/O is not supported on Linux\n"));
636 fFileFlags |= RTFILE_O_NO_CACHE;
637#else
638 if (!pEpClassFile->fHostCacheEnabled)
639 fFileFlags |= RTFILE_O_NO_CACHE;
640#endif
641 }
642
643 pEpFile->cbFile = cbSize;
644
645 RTFileClose(File);
646 }
647 }
648
649 /* Open with final flags. */
650 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
651 if ((rc == VERR_INVALID_FUNCTION) || (rc == VERR_INVALID_PARAMETER))
652 {
653 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
654 pszUri, fFileFlags, rc));
655 /*
656 * Solaris doesn't support directio on ZFS so far. :-\
657 * Trying to enable it returns VERR_INVALID_FUNCTION
658 * (ENOTTY). Remove it and hope for the best.
659 * ZFS supports write throttling in case applications
660 * write more data than can be synced to the disk
661 * without blocking the whole application.
662 *
663 * On Linux we have the same problem with cifs.
664 * Have to disable async I/O here too because it requires O_DIRECT.
665 */
666 fFileFlags &= ~RTFILE_O_NO_CACHE;
667
668#ifdef RT_OS_LINUX
669 fFileFlags &= ~RTFILE_O_ASYNC_IO;
670 fUseFailsafeManager = true;
671#endif
672
673 /* Open again. */
674 rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
675
676 if (RT_FAILURE(rc))
677 {
678 LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
679 pszUri, fFileFlags, rc));
680 }
681 }
682
683 if (RT_SUCCESS(rc))
684 {
685 pEpFile->fFlags = fFileFlags;
686
687 rc = RTFileGetSize(pEpFile->File, (uint64_t *)&pEpFile->cbFile);
688 if (RT_SUCCESS(rc) && (pEpFile->cbFile == 0))
689 {
690 /* Could be a block device */
691 rc = RTFileSeek(pEpFile->File, 0, RTFILE_SEEK_END, (uint64_t *)&pEpFile->cbFile);
692 }
693
694 if (RT_SUCCESS(rc))
695 {
696 /* Initialize the segment cache */
697 rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
698 sizeof(PDMACTASKFILE),
699 (void **)&pEpFile->pTasksFreeHead);
700 if (RT_SUCCESS(rc))
701 {
702 PPDMACEPFILEMGR pAioMgr = NULL;
703
704 pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead;
705 pEpFile->cTasksCached = 0;
706
707 if (fUseFailsafeManager)
708 {
709 /* Safe mode. Every file has its own async I/O manager. */
710 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, true);
711 AssertRC(rc);
712 }
713 else
714 {
715 if ( (fFlags & PDMACEP_FILE_FLAGS_CACHING)
716 && (pEpClassFile->fCacheEnabled))
717 {
718 pEpFile->fCaching = true;
719 rc = pdmacFileEpCacheInit(pEpFile, pEpClassFile);
720 if (RT_FAILURE(rc))
721 {
722 LogRel(("AIOMgr: Endpoint for \"%s\" was opened with caching but initializing cache failed. Disabled caching\n", pszUri));
723 pEpFile->fCaching = false;
724 }
725 }
726
727 pAioMgr = pEpClassFile->pAioMgrHead;
728
729 /* Check for an idling not failsafe one or create new if not found */
730 while (pAioMgr && pAioMgr->fFailsafe)
731 pAioMgr = pAioMgr->pNext;
732
733 if (!pAioMgr)
734 {
735 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, false);
736 AssertRC(rc);
737 }
738 }
739
740 pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
741 if (!pEpFile->AioMgr.pTreeRangesLocked)
742 rc = VERR_NO_MEMORY;
743 else
744 {
745 pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
746
747 /* Assign the endpoint to the thread. */
748 rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile);
749 if (RT_FAILURE(rc))
750 {
751 RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
752 MMR3HeapFree(pEpFile->pTasksFreeHead);
753 }
754 }
755 }
756 }
757
758 if (RT_FAILURE(rc))
759 RTFileClose(pEpFile->File);
760 }
761
762#ifdef VBOX_WITH_STATISTICS
763 if (RT_SUCCESS(rc))
764 {
765 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead,
766 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
767 STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
768 "/PDM/AsyncCompletion/File/%s/Read", RTPathFilename(pEpFile->Core.pszUri));
769
770 STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite,
771 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
772 STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
773 "/PDM/AsyncCompletion/File/%s/Write", RTPathFilename(pEpFile->Core.pszUri));
774 }
775#endif
776
777 return rc;
778}
779
780static int pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
781{
782 AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
783 return VINF_SUCCESS;
784}
785
786static int pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
787{
788 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
789 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
790
791 /* Make sure that all tasks finished for this endpoint. */
792 int rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile);
793 AssertRC(rc);
794
795 /*
796 * If the async I/O manager is in failsafe mode this is the only endpoint
797 * he processes and thus can be destroyed now.
798 */
799 if (pEpFile->pAioMgr->fFailsafe)
800 pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr);
801
802 /* Free cached tasks. */
803 PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead;
804
805 while (pTask)
806 {
807 PPDMACTASKFILE pTaskFree = pTask;
808 pTask = pTask->pNext;
809 MMR3HeapFree(pTaskFree);
810 }
811
812 /* Free the cached data. */
813 if (pEpFile->fCaching)
814 pdmacFileEpCacheDestroy(pEpFile);
815
816 /* Destroy the locked ranges tree now. */
817 RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL);
818
819 RTFileClose(pEpFile->File);
820
821#ifdef VBOX_WITH_STATISTICS
822 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatRead);
823 STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatWrite);
824#endif
825
826 return VINF_SUCCESS;
827}
828
829static int pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask,
830 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
831 PCPDMDATASEG paSegments, size_t cSegments,
832 size_t cbRead)
833{
834 int rc = VINF_SUCCESS;
835 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
836
837 STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read);
838
839 if (pEpFile->fCaching)
840 rc = pdmacFileEpCacheRead(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
841 off, paSegments, cSegments, cbRead);
842 else
843 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead,
844 PDMACTASKFILETRANSFER_READ);
845
846 STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read);
847
848 return rc;
849}
850
851static int pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask,
852 PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
853 PCPDMDATASEG paSegments, size_t cSegments,
854 size_t cbWrite)
855{
856 int rc = VINF_SUCCESS;
857 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
858
859 if (RT_UNLIKELY(pEpFile->fReadonly))
860 return VERR_NOT_SUPPORTED;
861
862 STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write);
863
864 if (pEpFile->fCaching)
865 rc = pdmacFileEpCacheWrite(pEpFile, (PPDMASYNCCOMPLETIONTASKFILE)pTask,
866 off, paSegments, cSegments, cbWrite);
867 else
868 rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite,
869 PDMACTASKFILETRANSFER_WRITE);
870
871 STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write);
872
873 return rc;
874}
875
876static int pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask,
877 PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
878{
879 int rc = VINF_SUCCESS;
880 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
881 PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
882
883 if (RT_UNLIKELY(pEpFile->fReadonly))
884 return VERR_NOT_SUPPORTED;
885
886 pTaskFile->cbTransferLeft = 0;
887
888 if (pEpFile->fCaching)
889 rc = pdmacFileEpCacheFlush(pEpFile, pTaskFile);
890 else
891 {
892 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
893 AssertPtr(pIoTask);
894
895 pIoTask->pEndpoint = pEpFile;
896 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH;
897 pIoTask->pvUser = pTaskFile;
898 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
899 pdmacFileEpAddTask(pEpFile, pIoTask);
900 rc = VINF_AIO_TASK_PENDING;
901 }
902
903 return rc;
904}
905
906static int pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize)
907{
908 PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
909
910 *pcbSize = ASMAtomicReadU64(&pEpFile->cbFile);
911
912 return VINF_SUCCESS;
913}
914
915const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile =
916{
917 /* u32Version */
918 PDMAC_EPCLASS_OPS_VERSION,
919 /* pcszName */
920 "File",
921 /* enmClassType */
922 PDMASYNCCOMPLETIONEPCLASSTYPE_FILE,
923 /* cbEndpointClassGlobal */
924 sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
925 /* cbEndpoint */
926 sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
927 /* cbTask */
928 sizeof(PDMASYNCCOMPLETIONTASKFILE),
929 /* pfnInitialize */
930 pdmacFileInitialize,
931 /* pfnTerminate */
932 pdmacFileTerminate,
933 /* pfnEpInitialize. */
934 pdmacFileEpInitialize,
935 /* pfnEpClose */
936 pdmacFileEpClose,
937 /* pfnEpRead */
938 pdmacFileEpRead,
939 /* pfnEpWrite */
940 pdmacFileEpWrite,
941 /* pfnEpFlush */
942 pdmacFileEpFlush,
943 /* pfnEpGetSize */
944 pdmacFileEpGetSize,
945 /* u32VersionEnd */
946 PDMAC_EPCLASS_OPS_VERSION
947};
948
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette