VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 27557

Last change on this file since 27557 was 27557, checked in by vboxsync, 15 years ago

I/O cache: Increase the endpoint size for every appending write, even if it isn't immediately written to the file (delay in the cache). Fixes data corruption when appending data in VMDK images

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 86.8 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 27557 2010-03-20 21:14:43Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the 2Q cache algorithm.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
31#include <iprt/types.h>
32#include <iprt/mem.h>
33#include <iprt/path.h>
34#include <VBox/log.h>
35#include <VBox/stam.h>
36
37#include "PDMAsyncCompletionFileInternal.h"
38
39/**
40 * A I/O memory context.
41 */
42typedef struct PDMIOMEMCTX
43{
44 /** Pointer to the scatter/gather list. */
45 PCPDMDATASEG paDataSeg;
46 /** Number of segments. */
47 size_t cSegments;
48 /** Current segment we are in. */
49 unsigned iSegIdx;
50 /** Pointer to the current buffer. */
51 uint8_t *pbBuf;
52 /** Number of bytes left in the current buffer. */
53 size_t cbBufLeft;
54} PDMIOMEMCTX, *PPDMIOMEMCTX;
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0)
63
64# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
65 do \
66 { \
67 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
68 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
69 } while(0)
70
71# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
72 do \
73 { \
74 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
75 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
76 } while(0)
77
78#else
79# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
80# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
81# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
82#endif
83
84/*******************************************************************************
85* Internal Functions *
86*******************************************************************************/
87static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
88
89/**
90 * Decrement the reference counter of the given cache entry.
91 *
92 * @returns nothing.
93 * @param pEntry The entry to release.
94 */
95DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
96{
97 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
98 ASMAtomicDecU32(&pEntry->cRefs);
99}
100
101/**
102 * Increment the reference counter of the given cache entry.
103 *
104 * @returns nothing.
105 * @param pEntry The entry to reference.
106 */
107DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
108{
109 ASMAtomicIncU32(&pEntry->cRefs);
110}
111
112/**
113 * Initialize a I/O memory context.
114 *
115 * @returns nothing
116 * @param pIoMemCtx Pointer to a unitialized I/O memory context.
117 * @param paDataSeg Pointer to the S/G list.
118 * @param cSegments Number of segments in the S/G list.
119 */
120DECLINLINE(void) pdmIoMemCtxInit(PPDMIOMEMCTX pIoMemCtx, PCPDMDATASEG paDataSeg, size_t cSegments)
121{
122 AssertMsg((cSegments > 0) && paDataSeg, ("Trying to initialize a I/O memory context without a S/G list\n"));
123
124 pIoMemCtx->paDataSeg = paDataSeg;
125 pIoMemCtx->cSegments = cSegments;
126 pIoMemCtx->iSegIdx = 0;
127 pIoMemCtx->pbBuf = (uint8_t *)paDataSeg[0].pvSeg;
128 pIoMemCtx->cbBufLeft = paDataSeg[0].cbSeg;
129}
130
131/**
132 * Return a buffer from the I/O memory context.
133 *
134 * @returns Pointer to the buffer
135 * @param pIoMemCtx Pointer to the I/O memory context.
136 * @param pcbData Pointer to the amount of byte requested.
137 * If the current buffer doesn't have enough bytes left
138 * the amount is returned in the variable.
139 */
140DECLINLINE(uint8_t *) pdmIoMemCtxGetBuffer(PPDMIOMEMCTX pIoMemCtx, size_t *pcbData)
141{
142 size_t cbData = RT_MIN(*pcbData, pIoMemCtx->cbBufLeft);
143 uint8_t *pbBuf = pIoMemCtx->pbBuf;
144
145 pIoMemCtx->cbBufLeft -= cbData;
146
147 /* Advance to the next segment if required. */
148 if (!pIoMemCtx->cbBufLeft)
149 {
150 pIoMemCtx->iSegIdx++;
151
152 if (RT_UNLIKELY(pIoMemCtx->iSegIdx == pIoMemCtx->cSegments))
153 {
154 pIoMemCtx->cbBufLeft = 0;
155 pIoMemCtx->pbBuf = NULL;
156 }
157 else
158 {
159 pIoMemCtx->pbBuf = (uint8_t *)pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].pvSeg;
160 pIoMemCtx->cbBufLeft = pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].cbSeg;
161 }
162
163 *pcbData = cbData;
164 }
165 else
166 pIoMemCtx->pbBuf += cbData;
167
168 return pbBuf;
169}
170
171#ifdef DEBUG
172static void pdmacFileCacheValidate(PPDMACFILECACHEGLOBAL pCache)
173{
174 /* Amount of cached data should never exceed the maximum amount. */
175 AssertMsg(pCache->cbCached <= pCache->cbMax,
176 ("Current amount of cached data exceeds maximum\n"));
177
178 /* The amount of cached data in the LRU and FRU list should match cbCached */
179 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
180 ("Amount of cached data doesn't match\n"));
181
182 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
183 ("Paged out list exceeds maximum\n"));
184}
185#endif
186
187DECLINLINE(void) pdmacFileCacheLockEnter(PPDMACFILECACHEGLOBAL pCache)
188{
189 RTCritSectEnter(&pCache->CritSect);
190#ifdef DEBUG
191 pdmacFileCacheValidate(pCache);
192#endif
193}
194
195DECLINLINE(void) pdmacFileCacheLockLeave(PPDMACFILECACHEGLOBAL pCache)
196{
197#ifdef DEBUG
198 pdmacFileCacheValidate(pCache);
199#endif
200 RTCritSectLeave(&pCache->CritSect);
201}
202
203DECLINLINE(void) pdmacFileCacheSub(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
204{
205 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
206 pCache->cbCached -= cbAmount;
207}
208
209DECLINLINE(void) pdmacFileCacheAdd(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
210{
211 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
212 pCache->cbCached += cbAmount;
213}
214
215DECLINLINE(void) pdmacFileCacheListAdd(PPDMACFILELRULIST pList, uint32_t cbAmount)
216{
217 pList->cbCached += cbAmount;
218}
219
220DECLINLINE(void) pdmacFileCacheListSub(PPDMACFILELRULIST pList, uint32_t cbAmount)
221{
222 pList->cbCached -= cbAmount;
223}
224
225#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
226/**
227 * Checks consistency of a LRU list.
228 *
229 * @returns nothing
230 * @param pList The LRU list to check.
231 * @param pNotInList Element which is not allowed to occur in the list.
232 */
233static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
234{
235 PPDMACFILECACHEENTRY pCurr = pList->pHead;
236
237 /* Check that there are no double entries and no cycles in the list. */
238 while (pCurr)
239 {
240 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
241
242 while (pNext)
243 {
244 AssertMsg(pCurr != pNext,
245 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
246 pCurr, pList));
247 pNext = pNext->pNext;
248 }
249
250 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
251
252 if (!pCurr->pNext)
253 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
254
255 pCurr = pCurr->pNext;
256 }
257}
258#endif
259
260/**
261 * Unlinks a cache entry from the LRU list it is assigned to.
262 *
263 * @returns nothing.
264 * @param pEntry The entry to unlink.
265 */
266static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
267{
268 PPDMACFILELRULIST pList = pEntry->pList;
269 PPDMACFILECACHEENTRY pPrev, pNext;
270
271 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
272
273 AssertPtr(pList);
274
275#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
276 pdmacFileCacheCheckList(pList, NULL);
277#endif
278
279 pPrev = pEntry->pPrev;
280 pNext = pEntry->pNext;
281
282 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
283 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
284
285 if (pPrev)
286 pPrev->pNext = pNext;
287 else
288 {
289 pList->pHead = pNext;
290
291 if (pNext)
292 pNext->pPrev = NULL;
293 }
294
295 if (pNext)
296 pNext->pPrev = pPrev;
297 else
298 {
299 pList->pTail = pPrev;
300
301 if (pPrev)
302 pPrev->pNext = NULL;
303 }
304
305 pEntry->pList = NULL;
306 pEntry->pPrev = NULL;
307 pEntry->pNext = NULL;
308 pdmacFileCacheListSub(pList, pEntry->cbData);
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmacFileCacheCheckList(pList, pEntry);
311#endif
312}
313
314/**
315 * Adds a cache entry to the given LRU list unlinking it from the currently
316 * assigned list if needed.
317 *
318 * @returns nothing.
319 * @param pList List to the add entry to.
320 * @param pEntry Entry to add.
321 */
322static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
323{
324 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
325#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
326 pdmacFileCacheCheckList(pList, NULL);
327#endif
328
329 /* Remove from old list if needed */
330 if (pEntry->pList)
331 pdmacFileCacheEntryRemoveFromList(pEntry);
332
333 pEntry->pNext = pList->pHead;
334 if (pList->pHead)
335 pList->pHead->pPrev = pEntry;
336 else
337 {
338 Assert(!pList->pTail);
339 pList->pTail = pEntry;
340 }
341
342 pEntry->pPrev = NULL;
343 pList->pHead = pEntry;
344 pdmacFileCacheListAdd(pList, pEntry->cbData);
345 pEntry->pList = pList;
346#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
347 pdmacFileCacheCheckList(pList, NULL);
348#endif
349}
350
351/**
352 * Destroys a LRU list freeing all entries.
353 *
354 * @returns nothing
355 * @param pList Pointer to the LRU list to destroy.
356 *
357 * @note The caller must own the critical section of the cache.
358 */
359static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
360{
361 while (pList->pHead)
362 {
363 PPDMACFILECACHEENTRY pEntry = pList->pHead;
364
365 pList->pHead = pEntry->pNext;
366
367 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
368 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
369
370 RTMemPageFree(pEntry->pbData);
371 RTMemFree(pEntry);
372 }
373}
374
375/**
376 * Tries to remove the given amount of bytes from a given list in the cache
377 * moving the entries to one of the given ghosts lists
378 *
379 * @returns Amount of data which could be freed.
380 * @param pCache Pointer to the global cache data.
381 * @param cbData The amount of the data to free.
382 * @param pListSrc The source list to evict data from.
383 * @param pGhostListSrc The ghost list removed entries should be moved to
384 * NULL if the entry should be freed.
385 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
386 * @param ppbBuf Where to store the address of the buffer if an entry with the
387 * same size was found and fReuseBuffer is true.
388 *
389 * @note This function may return fewer bytes than requested because entries
390 * may be marked as non evictable if they are used for I/O at the
391 * moment.
392 */
393static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
394 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
395 bool fReuseBuffer, uint8_t **ppbBuffer)
396{
397 size_t cbEvicted = 0;
398
399 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
400
401 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
402 AssertMsg( !pGhostListDst
403 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
404 ("Destination list must be NULL or the recently used but paged out list\n"));
405
406 if (fReuseBuffer)
407 {
408 AssertPtr(ppbBuffer);
409 *ppbBuffer = NULL;
410 }
411
412 /* Start deleting from the tail. */
413 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
414
415 while ((cbEvicted < cbData) && pEntry)
416 {
417 PPDMACFILECACHEENTRY pCurr = pEntry;
418
419 pEntry = pEntry->pPrev;
420
421 /* We can't evict pages which are currently in progress or dirty but not in progress */
422 if ( !(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
423 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
424 {
425 /* Ok eviction candidate. Grab the endpoint semaphore and check again
426 * because somebody else might have raced us. */
427 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
428 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (!(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
431 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
432 {
433 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
434 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
435 Assert(!pCurr->pbDataReplace);
436
437 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
438
439 if (fReuseBuffer && (pCurr->cbData == cbData))
440 {
441 STAM_COUNTER_INC(&pCache->StatBuffersReused);
442 *ppbBuffer = pCurr->pbData;
443 }
444 else if (pCurr->pbData)
445 RTMemPageFree(pCurr->pbData);
446
447 pCurr->pbData = NULL;
448 cbEvicted += pCurr->cbData;
449
450 pdmacFileCacheEntryRemoveFromList(pCurr);
451 pdmacFileCacheSub(pCache, pCurr->cbData);
452
453 if (pGhostListDst)
454 {
455 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
456
457 PPDMACFILECACHEENTRY pGhostEntFree = pGhostListDst->pTail;
458
459 /* We have to remove the last entries from the paged out list. */
460 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
461 && pGhostEntFree)
462 {
463 PPDMACFILECACHEENTRY pFree = pGhostEntFree;
464 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
465
466 pGhostEntFree = pGhostEntFree->pPrev;
467
468 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
469
470 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
471 {
472 pdmacFileCacheEntryRemoveFromList(pFree);
473
474 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
475 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
476 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
477
478 RTMemFree(pFree);
479 }
480
481 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
482 }
483
484 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
485 {
486 /* Couldn't remove enough entries. Delete */
487 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
488 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
489 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
490
491 RTMemFree(pCurr);
492 }
493 else
494 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
495 }
496 else
497 {
498 /* Delete the entry from the AVL tree it is assigned to. */
499 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
500 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
501 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
502
503 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
504 RTMemFree(pCurr);
505 }
506 }
507
508 }
509 else
510 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
511 }
512
513 return cbEvicted;
514}
515
516static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
517{
518 size_t cbRemoved = 0;
519
520 if ((pCache->cbCached + cbData) < pCache->cbMax)
521 return true;
522 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
523 {
524 /* Try to evict as many bytes as possible from A1in */
525 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
526 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
527
528 /*
529 * If it was not possible to remove enough entries
530 * try the frequently accessed cache.
531 */
532 if (cbRemoved < cbData)
533 {
534 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
535
536 /*
537 * If we removed something we can't pass the reuse buffer flag anymore because
538 * we don't need to evict that much data
539 */
540 if (!cbRemoved)
541 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
542 NULL, fReuseBuffer, ppbBuffer);
543 else
544 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
545 NULL, false, NULL);
546 }
547 }
548 else
549 {
550 /* We have to remove entries from frequently access list. */
551 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
552 NULL, fReuseBuffer, ppbBuffer);
553 }
554
555 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
556 return (cbRemoved >= cbData);
557}
558
559/**
560 * Initiates a read I/O task for the given entry.
561 *
562 * @returns nothing.
563 * @param pEntry The entry to fetch the data to.
564 */
565static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
566{
567 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
568
569 /* Make sure no one evicts the entry while it is accessed. */
570 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
571
572 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
573 AssertPtr(pIoTask);
574
575 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
576
577 pIoTask->pEndpoint = pEntry->pEndpoint;
578 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
579 pIoTask->Off = pEntry->Core.Key;
580 pIoTask->DataSeg.cbSeg = pEntry->cbData;
581 pIoTask->DataSeg.pvSeg = pEntry->pbData;
582 pIoTask->pvUser = pEntry;
583 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
584
585 /* Send it off to the I/O manager. */
586 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
587}
588
589/**
590 * Initiates a write I/O task for the given entry.
591 *
592 * @returns nothing.
593 * @param pEntry The entry to read the data from.
594 */
595static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
596{
597 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
598
599 /* Make sure no one evicts the entry while it is accessed. */
600 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
601
602 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
603 AssertPtr(pIoTask);
604
605 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
606
607 pIoTask->pEndpoint = pEntry->pEndpoint;
608 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
609 pIoTask->Off = pEntry->Core.Key;
610 pIoTask->DataSeg.cbSeg = pEntry->cbData;
611 pIoTask->DataSeg.pvSeg = pEntry->pbData;
612 pIoTask->pvUser = pEntry;
613 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
614 ASMAtomicIncU32(&pEntry->pEndpoint->DataCache.cWritesOutstanding);
615
616 /* Send it off to the I/O manager. */
617 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
618}
619
620/**
621 * Commit a single dirty entry to the endpoint
622 *
623 * @returns nothing
624 * @param pEntry The entry to commit.
625 */
626static void pdmacFileCacheEntryCommit(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
627{
628 NOREF(pEndpointCache);
629 AssertMsg( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
630 && !(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
631 ("Invalid flags set for entry %#p\n", pEntry));
632
633 pdmacFileCacheWriteToEndpoint(pEntry);
634}
635
636/**
637 * Commit all dirty entries for a single endpoint.
638 *
639 * @returns nothing.
640 * @param pEndpointCache The endpoint cache to commit.
641 */
642static void pdmacFileCacheEndpointCommit(PPDMACFILEENDPOINTCACHE pEndpointCache)
643{
644 uint32_t cbCommitted = 0;
645 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
646
647 /* The list is moved to a new header to reduce locking overhead. */
648 RTLISTNODE ListDirtyNotCommitted;
649 RTSPINLOCKTMP Tmp;
650
651 RTListInit(&ListDirtyNotCommitted);
652 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
653 RTListMove(&ListDirtyNotCommitted, &pEndpointCache->ListDirtyNotCommitted);
654 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
655
656 if (!RTListIsEmpty(&ListDirtyNotCommitted))
657 {
658 PPDMACFILECACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
659 PDMACFILECACHEENTRY,
660 NodeNotCommitted);
661
662 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
663 {
664 PPDMACFILECACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMACFILECACHEENTRY,
665 NodeNotCommitted);
666 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
667 cbCommitted += pEntry->cbData;
668 RTListNodeRemove(&pEntry->NodeNotCommitted);
669 pEntry = pNext;
670 }
671
672 /* Commit the last endpoint */
673 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
674 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
675 RTListNodeRemove(&pEntry->NodeNotCommitted);
676 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
677 ("Committed all entries but list is not empty\n"));
678 }
679
680 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
681 AssertMsg(pEndpointCache->pCache->cbDirty >= cbCommitted,
682 ("Number of committed bytes exceeds number of dirty bytes\n"));
683 ASMAtomicSubU32(&pEndpointCache->pCache->cbDirty, cbCommitted);
684}
685
686/**
687 * Commit all dirty entries in the cache.
688 *
689 * @returns nothing.
690 * @param pCache The global cache instance.
691 */
692static void pdmacFileCacheCommitDirtyEntries(PPDMACFILECACHEGLOBAL pCache)
693{
694 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
695
696 if (!fCommitInProgress)
697 {
698 pdmacFileCacheLockEnter(pCache);
699 Assert(!RTListIsEmpty(&pCache->ListEndpoints));
700
701 PPDMACFILEENDPOINTCACHE pEndpointCache = RTListNodeGetFirst(&pCache->ListEndpoints,
702 PDMACFILEENDPOINTCACHE,
703 NodeCacheEndpoint);
704 AssertPtr(pEndpointCache);
705
706 while (!RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint))
707 {
708 pdmacFileCacheEndpointCommit(pEndpointCache);
709
710 pEndpointCache = RTListNodeGetNext(&pEndpointCache->NodeCacheEndpoint, PDMACFILEENDPOINTCACHE,
711 NodeCacheEndpoint);
712 }
713
714 /* Commit the last endpoint */
715 Assert(RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint));
716 pdmacFileCacheEndpointCommit(pEndpointCache);
717
718 pdmacFileCacheLockLeave(pCache);
719 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
720 }
721}
722
723/**
724 * Adds the given entry as a dirty to the cache.
725 *
726 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
727 * @param pEndpointCache The endpoint cache the entry belongs to.
728 * @param pEntry The entry to add.
729 */
730static bool pdmacFileCacheAddDirtyEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
731{
732 bool fDirtyBytesExceeded = false;
733 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
734
735 /* If the commit timer is disabled we commit right away. */
736 if (pCache->u32CommitTimeoutMs == 0)
737 {
738 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
739 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
740 }
741 else if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
742 {
743 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
744
745 RTSPINLOCKTMP Tmp;
746 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
747 RTListAppend(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
748 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
749
750 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
751
752 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
753 }
754
755 return fDirtyBytesExceeded;
756}
757
758
759/**
760 * Completes a task segment freeing all ressources and completes the task handle
761 * if everything was transfered.
762 *
763 * @returns Next task segment handle.
764 * @param pEndpointCache The endpoint cache.
765 * @param pTaskSeg Task segment to complete.
766 */
767static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
768{
769 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
770
771 uint32_t uOld = ASMAtomicSubS32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
772 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
773 if (!(uOld - pTaskSeg->cbTransfer)
774 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
775 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core, true);
776
777 RTMemFree(pTaskSeg);
778
779 return pNext;
780}
781
782/**
783 * Completion callback for I/O tasks.
784 *
785 * @returns nothing.
786 * @param pTask The completed task.
787 * @param pvUser Opaque user data.
788 */
789static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
790{
791 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
792 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
793 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
794 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
795
796 /* Reference the entry now as we are clearing the I/O in progres flag
797 * which protects the entry till now. */
798 pdmacFileEpCacheEntryRef(pEntry);
799
800 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
801 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
802
803 /* Process waiting segment list. The data in entry might have changed inbetween. */
804 bool fDirty = false;
805 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
806
807 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
808 ("The list tail was not updated correctly\n"));
809 pEntry->pWaitingTail = NULL;
810 pEntry->pWaitingHead = NULL;
811
812 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
813 {
814 AssertMsg(pEndpointCache->cWritesOutstanding > 0, ("Completed write request but outstanding task count is 0\n"));
815 ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
816
817 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
818
819 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
820 {
821 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
822
823 RTMemPageFree(pEntry->pbData);
824 pEntry->pbData = pEntry->pbDataReplace;
825 pEntry->pbDataReplace = NULL;
826 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
827 }
828 else
829 {
830 while (pCurr)
831 {
832 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
833
834 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
835 fDirty = true;
836
837 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
838 }
839 }
840 }
841 else
842 {
843 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
844 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IS_DIRTY | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
845 ("Invalid flags set\n"));
846
847 while (pCurr)
848 {
849 if (pCurr->fWrite)
850 {
851 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
852 fDirty = true;
853 }
854 else
855 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
856
857 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
858 }
859 }
860
861 bool fCommit = false;
862 if (fDirty)
863 fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
864
865 /* Complete a pending flush if all writes have completed */
866 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
867 {
868 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
869 if (pTaskFlush)
870 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
871 }
872
873 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
874
875 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
876 pdmacFileEpCacheEntryRelease(pEntry);
877
878 if (fCommit)
879 pdmacFileCacheCommitDirtyEntries(pCache);
880}
881
882/**
883 * Commit timer callback.
884 */
885static void pdmacFileCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
886{
887 PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser;
888 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
889
890 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
891
892 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
893 pdmacFileCacheCommitDirtyEntries(pCache);
894
895 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
896 LogFlowFunc(("Entries committed, going to sleep\n"));
897}
898
899/**
900 * Initializies the I/O cache.
901 *
902 * returns VBox status code.
903 * @param pClassFile The global class data for file endpoints.
904 * @param pCfgNode CFGM node to query configuration data from.
905 */
906int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
907{
908 int rc = VINF_SUCCESS;
909 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
910
911 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
912 AssertLogRelRCReturn(rc, rc);
913
914 RTListInit(&pCache->ListEndpoints);
915 pCache->cRefs = 0;
916 pCache->cbCached = 0;
917 pCache->fCommitInProgress = 0;
918 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
919
920 /* Initialize members */
921 pCache->LruRecentlyUsedIn.pHead = NULL;
922 pCache->LruRecentlyUsedIn.pTail = NULL;
923 pCache->LruRecentlyUsedIn.cbCached = 0;
924
925 pCache->LruRecentlyUsedOut.pHead = NULL;
926 pCache->LruRecentlyUsedOut.pTail = NULL;
927 pCache->LruRecentlyUsedOut.cbCached = 0;
928
929 pCache->LruFrequentlyUsed.pHead = NULL;
930 pCache->LruFrequentlyUsed.pTail = NULL;
931 pCache->LruFrequentlyUsed.cbCached = 0;
932
933 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
934 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
935 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
936
937 /** @todo r=aeichner: Experiment to find optimal default values */
938 rc = CFGMR3QueryU32Def(pCfgNode, "CacheCommitIntervalMs", &pCache->u32CommitTimeoutMs, 10000 /* 10sec */);
939 AssertLogRelRCReturn(rc, rc);
940 rc = CFGMR3QueryU32(pCfgNode, "CacheCommitThreshold", &pCache->cbCommitDirtyThreshold);
941 if ( rc == VERR_CFGM_VALUE_NOT_FOUND
942 || rc == VERR_CFGM_NO_PARENT)
943 {
944 /* Start committing after 50% of the cache are dirty */
945 pCache->cbCommitDirtyThreshold = pCache->cbMax / 2;
946 }
947 else
948 return rc;
949
950 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
951 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
952 "/PDM/AsyncCompletion/File/cbMax",
953 STAMUNIT_BYTES,
954 "Maximum cache size");
955 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
956 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
957 "/PDM/AsyncCompletion/File/cbCached",
958 STAMUNIT_BYTES,
959 "Currently used cache");
960 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
961 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
962 "/PDM/AsyncCompletion/File/cbCachedMruIn",
963 STAMUNIT_BYTES,
964 "Number of bytes cached in MRU list");
965 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
966 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
967 "/PDM/AsyncCompletion/File/cbCachedMruOut",
968 STAMUNIT_BYTES,
969 "Number of bytes cached in FRU list");
970 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
971 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
972 "/PDM/AsyncCompletion/File/cbCachedFru",
973 STAMUNIT_BYTES,
974 "Number of bytes cached in FRU ghost list");
975
976#ifdef VBOX_WITH_STATISTICS
977 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
978 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
979 "/PDM/AsyncCompletion/File/CacheHits",
980 STAMUNIT_COUNT, "Number of hits in the cache");
981 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
982 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
983 "/PDM/AsyncCompletion/File/CachePartialHits",
984 STAMUNIT_COUNT, "Number of partial hits in the cache");
985 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
986 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
987 "/PDM/AsyncCompletion/File/CacheMisses",
988 STAMUNIT_COUNT, "Number of misses when accessing the cache");
989 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
990 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
991 "/PDM/AsyncCompletion/File/CacheRead",
992 STAMUNIT_BYTES, "Number of bytes read from the cache");
993 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
994 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
995 "/PDM/AsyncCompletion/File/CacheWritten",
996 STAMUNIT_BYTES, "Number of bytes written to the cache");
997 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
998 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
999 "/PDM/AsyncCompletion/File/CacheTreeGet",
1000 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1001 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
1002 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1003 "/PDM/AsyncCompletion/File/CacheTreeInsert",
1004 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1005 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
1006 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1007 "/PDM/AsyncCompletion/File/CacheTreeRemove",
1008 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1009 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
1010 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1011 "/PDM/AsyncCompletion/File/CacheBuffersReused",
1012 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1013#endif
1014
1015 /* Initialize the critical section */
1016 rc = RTCritSectInit(&pCache->CritSect);
1017
1018 if (RT_SUCCESS(rc))
1019 {
1020 /* Create the commit timer */
1021 if (pCache->u32CommitTimeoutMs > 0)
1022 rc = TMR3TimerCreateInternal(pClassFile->Core.pVM, TMCLOCK_REAL,
1023 pdmacFileCacheCommitTimerCallback,
1024 pClassFile,
1025 "Cache-Commit",
1026 &pClassFile->Cache.pTimerCommit);
1027
1028 if (RT_SUCCESS(rc))
1029 {
1030 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
1031 LogRel(("AIOMgr: Cache commit interval is %u ms\n", pCache->u32CommitTimeoutMs));
1032 LogRel(("AIOMgr: Cache commit threshold is %u bytes\n", pCache->cbCommitDirtyThreshold));
1033 return VINF_SUCCESS;
1034 }
1035
1036 RTCritSectDelete(&pCache->CritSect);
1037 }
1038
1039 return rc;
1040}
1041
1042/**
1043 * Destroysthe cache freeing all data.
1044 *
1045 * returns nothing.
1046 * @param pClassFile The global class data for file endpoints.
1047 */
1048void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1049{
1050 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
1051
1052 /* Make sure no one else uses the cache now */
1053 pdmacFileCacheLockEnter(pCache);
1054
1055 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1056 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
1057 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
1058 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
1059
1060 pdmacFileCacheLockLeave(pCache);
1061
1062 RTCritSectDelete(&pCache->CritSect);
1063}
1064
1065/**
1066 * Initializes per endpoint cache data
1067 * like the AVL tree used to access cached entries.
1068 *
1069 * @returns VBox status code.
1070 * @param pEndpoint The endpoint to init the cache for,
1071 * @param pClassFile The global class data for file endpoints.
1072 */
1073int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1074{
1075 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1076
1077 pEndpointCache->pCache = &pClassFile->Cache;
1078 RTListInit(&pEndpointCache->ListDirtyNotCommitted);
1079 int rc = RTSpinlockCreate(&pEndpointCache->LockList);
1080
1081 if (RT_SUCCESS(rc))
1082 {
1083 rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
1084 if (RT_SUCCESS(rc))
1085 {
1086 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1087 if (pEndpointCache->pTree)
1088 {
1089 pClassFile->Cache.cRefs++;
1090 RTListAppend(&pClassFile->Cache.ListEndpoints, &pEndpointCache->NodeCacheEndpoint);
1091
1092 /* Arm the timer if this is the first endpoint. */
1093 if ( pClassFile->Cache.cRefs == 1
1094 && pClassFile->Cache.u32CommitTimeoutMs > 0)
1095 rc = TMTimerSetMillies(pClassFile->Cache.pTimerCommit, pClassFile->Cache.u32CommitTimeoutMs);
1096 }
1097 else
1098 rc = VERR_NO_MEMORY;
1099
1100 if (RT_FAILURE(rc))
1101 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1102 }
1103
1104 if (RT_FAILURE(rc))
1105 RTSpinlockDestroy(pEndpointCache->LockList);
1106 }
1107
1108#ifdef VBOX_WITH_STATISTICS
1109 if (RT_SUCCESS(rc))
1110 {
1111 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
1112 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1113 STAMUNIT_COUNT, "Number of deferred writes",
1114 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
1115 }
1116#endif
1117
1118 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1119 return rc;
1120}
1121
1122/**
1123 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1124 *
1125 * @returns IPRT status code.
1126 * @param pNode The node to destroy.
1127 * @param pvUser Opaque user data.
1128 */
1129static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1130{
1131 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
1132 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
1133 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
1134
1135 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
1136 {
1137 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1138 RTThreadSleep(250);
1139 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1140 }
1141
1142 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
1143 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1144
1145 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1146 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1147
1148 pdmacFileCacheEntryRemoveFromList(pEntry);
1149
1150 if (fUpdateCache)
1151 pdmacFileCacheSub(pCache, pEntry->cbData);
1152
1153 RTMemPageFree(pEntry->pbData);
1154 RTMemFree(pEntry);
1155
1156 return VINF_SUCCESS;
1157}
1158
1159/**
1160 * Destroys all cache ressources used by the given endpoint.
1161 *
1162 * @returns nothing.
1163 * @param pEndpoint The endpoint to the destroy.
1164 */
1165void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1166{
1167 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1168 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1169
1170 /* Make sure nobody is accessing the cache while we delete the tree. */
1171 pdmacFileCacheLockEnter(pCache);
1172 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1173 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
1174 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1175
1176 RTSpinlockDestroy(pEndpointCache->LockList);
1177
1178 pCache->cRefs--;
1179 RTListNodeRemove(&pEndpointCache->NodeCacheEndpoint);
1180
1181 if ( !pCache->cRefs
1182 && pCache->u32CommitTimeoutMs > 0)
1183 TMTimerStop(pCache->pTimerCommit);
1184
1185 pdmacFileCacheLockLeave(pCache);
1186
1187 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1188
1189#ifdef VBOX_WITH_STATISTICS
1190 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
1191
1192 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
1193#endif
1194}
1195
1196static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1197{
1198 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1199 PPDMACFILECACHEENTRY pEntry = NULL;
1200
1201 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1202
1203 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1204 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
1205 if (pEntry)
1206 pdmacFileEpCacheEntryRef(pEntry);
1207 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1208
1209 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1210
1211 return pEntry;
1212}
1213
1214/**
1215 * Return the best fit cache entries for the given offset.
1216 *
1217 * @returns nothing.
1218 * @param pEndpointCache The endpoint cache.
1219 * @param off The offset.
1220 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1221 * the given offset. NULL if not required.
1222 * @param pEntryBelow Where to store the pointer to the best fit entry below the
1223 * the given offset. NULL if not required.
1224 */
1225static void pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off,
1226 PPDMACFILECACHEENTRY *ppEntryAbove,
1227 PPDMACFILECACHEENTRY *ppEntryBelow)
1228{
1229 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1230
1231 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1232
1233 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1234 if (ppEntryAbove)
1235 {
1236 *ppEntryAbove = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
1237 if (*ppEntryAbove)
1238 pdmacFileEpCacheEntryRef(*ppEntryAbove);
1239 }
1240
1241 if (ppEntryBelow)
1242 {
1243 *ppEntryBelow = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, false /*fAbove*/);
1244 if (*ppEntryBelow)
1245 pdmacFileEpCacheEntryRef(*ppEntryBelow);
1246 }
1247 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1248
1249 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1250}
1251
1252static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1253{
1254 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1255
1256 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1257 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1258 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1259 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1260 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1261 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1262}
1263
1264/**
1265 * Allocates and initializes a new entry for the cache.
1266 * The entry has a reference count of 1.
1267 *
1268 * @returns Pointer to the new cache entry or NULL if out of memory.
1269 * @param pCache The cache the entry belongs to.
1270 * @param pEndoint The endpoint the entry holds data for.
1271 * @param off Start offset.
1272 * @param cbData Size of the cache entry.
1273 * @param pbBuffer Pointer to the buffer to use.
1274 * NULL if a new buffer should be allocated.
1275 * The buffer needs to have the same size of the entry.
1276 */
1277static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1278 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1279 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1280{
1281 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1282
1283 if (RT_UNLIKELY(!pEntryNew))
1284 return NULL;
1285
1286 pEntryNew->Core.Key = off;
1287 pEntryNew->Core.KeyLast = off + cbData - 1;
1288 pEntryNew->pEndpoint = pEndpoint;
1289 pEntryNew->pCache = pCache;
1290 pEntryNew->fFlags = 0;
1291 pEntryNew->cRefs = 1; /* We are using it now. */
1292 pEntryNew->pList = NULL;
1293 pEntryNew->cbData = cbData;
1294 pEntryNew->pWaitingHead = NULL;
1295 pEntryNew->pWaitingTail = NULL;
1296 pEntryNew->pbDataReplace = NULL;
1297 if (pbBuffer)
1298 pEntryNew->pbData = pbBuffer;
1299 else
1300 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1301
1302 if (RT_UNLIKELY(!pEntryNew->pbData))
1303 {
1304 RTMemFree(pEntryNew);
1305 return NULL;
1306 }
1307
1308 return pEntryNew;
1309}
1310
1311/**
1312 * Adds a segment to the waiting list for a cache entry
1313 * which is currently in progress.
1314 *
1315 * @returns nothing.
1316 * @param pEntry The cache entry to add the segment to.
1317 * @param pSeg The segment to add.
1318 */
1319DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1320{
1321 pSeg->pNext = NULL;
1322
1323 if (pEntry->pWaitingHead)
1324 {
1325 AssertPtr(pEntry->pWaitingTail);
1326
1327 pEntry->pWaitingTail->pNext = pSeg;
1328 pEntry->pWaitingTail = pSeg;
1329 }
1330 else
1331 {
1332 Assert(!pEntry->pWaitingTail);
1333
1334 pEntry->pWaitingHead = pSeg;
1335 pEntry->pWaitingTail = pSeg;
1336 }
1337}
1338
1339/**
1340 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1341 * in exclusive mode.
1342 *
1343 * @returns true if the flag in fSet is set and the one in fClear is clear.
1344 * false othwerise.
1345 * The R/W semaphore is only held if true is returned.
1346 *
1347 * @param pEndpointCache The endpoint cache instance data.
1348 * @param pEntry The entry to check the flags for.
1349 * @param fSet The flag which is tested to be set.
1350 * @param fClear The flag which is tested to be clear.
1351 */
1352DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1353 PPDMACFILECACHEENTRY pEntry,
1354 uint32_t fSet, uint32_t fClear)
1355{
1356 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1357 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1358
1359 if (fPassed)
1360 {
1361 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1362 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1363
1364 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1365 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1366
1367 /* Drop the lock if we didn't passed the test. */
1368 if (!fPassed)
1369 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1370 }
1371
1372 return fPassed;
1373}
1374
1375/**
1376 * Copies data to a buffer described by a I/O memory context.
1377 *
1378 * @returns nothing.
1379 * @param pIoMemCtx The I/O memory context to copy the data into.
1380 * @param pbData Pointer to the data data to copy.
1381 * @param cbData Amount of data to copy.
1382 */
1383static void pdmacFileEpCacheCopyToIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1384 uint8_t *pbData,
1385 size_t cbData)
1386{
1387 while (cbData)
1388 {
1389 size_t cbCopy = cbData;
1390 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1391
1392 AssertPtr(pbBuf);
1393
1394 memcpy(pbBuf, pbData, cbCopy);
1395
1396 cbData -= cbCopy;
1397 pbData += cbCopy;
1398 }
1399}
1400
1401/**
1402 * Copies data from a buffer described by a I/O memory context.
1403 *
1404 * @returns nothing.
1405 * @param pIoMemCtx The I/O memory context to copy the data from.
1406 * @param pbData Pointer to the destination buffer.
1407 * @param cbData Amount of data to copy.
1408 */
1409static void pdmacFileEpCacheCopyFromIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1410 uint8_t *pbData,
1411 size_t cbData)
1412{
1413 while (cbData)
1414 {
1415 size_t cbCopy = cbData;
1416 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1417
1418 AssertPtr(pbBuf);
1419
1420 memcpy(pbData, pbBuf, cbCopy);
1421
1422 cbData -= cbCopy;
1423 pbData += cbCopy;
1424 }
1425}
1426
1427/**
1428 * Add a buffer described by the I/O memory context
1429 * to the entry waiting for completion.
1430 *
1431 * @returns nothing.
1432 * @param pEntry The entry to add the buffer to.
1433 * @param pTask Task associated with the buffer.
1434 * @param pIoMemCtx The memory context to use.
1435 * @param OffDiff Offset from the start of the buffer
1436 * in the entry.
1437 * @param cbData Amount of data to wait for onthis entry.
1438 * @param fWrite Flag whether the task waits because it wants to write
1439 * to the cache entry.
1440 */
1441static void pdmacFileEpCacheEntryWaitersAdd(PPDMACFILECACHEENTRY pEntry,
1442 PPDMASYNCCOMPLETIONTASKFILE pTask,
1443 PPDMIOMEMCTX pIoMemCtx,
1444 RTFOFF OffDiff,
1445 size_t cbData,
1446 bool fWrite)
1447{
1448 while (cbData)
1449 {
1450 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1451 size_t cbSeg = cbData;
1452 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1453
1454 pSeg->pTask = pTask;
1455 pSeg->uBufOffset = OffDiff;
1456 pSeg->cbTransfer = cbSeg;
1457 pSeg->pvBuf = pbBuf;
1458 pSeg->fWrite = fWrite;
1459
1460 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1461
1462 cbData -= cbSeg;
1463 OffDiff += cbSeg;
1464 }
1465}
1466
1467/**
1468 * Passthrough a part of a request directly to the I/O manager
1469 * handling the endpoint.
1470 *
1471 * @returns nothing.
1472 * @param pEndpoint The endpoint.
1473 * @param pTask The task.
1474 * @param pIoMemCtx The I/O memory context to use.
1475 * @param offStart Offset to start transfer from.
1476 * @param cbData Amount of data to transfer.
1477 * @param enmTransferType The transfer type (read/write)
1478 */
1479static void pdmacFileEpCacheRequestPassthrough(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1480 PPDMASYNCCOMPLETIONTASKFILE pTask,
1481 PPDMIOMEMCTX pIoMemCtx,
1482 RTFOFF offStart, size_t cbData,
1483 PDMACTASKFILETRANSFER enmTransferType)
1484{
1485 while (cbData)
1486 {
1487 size_t cbSeg = cbData;
1488 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1489 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1490 AssertPtr(pIoTask);
1491
1492 pIoTask->pEndpoint = pEndpoint;
1493 pIoTask->enmTransferType = enmTransferType;
1494 pIoTask->Off = offStart;
1495 pIoTask->DataSeg.cbSeg = cbSeg;
1496 pIoTask->DataSeg.pvSeg = pbBuf;
1497 pIoTask->pvUser = pTask;
1498 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1499
1500 offStart += cbSeg;
1501 cbData -= cbSeg;
1502
1503 /* Send it off to the I/O manager. */
1504 pdmacFileEpAddTask(pEndpoint, pIoTask);
1505 }
1506}
1507
1508/**
1509 * Calculate aligned offset and size for a new cache entry
1510 * which do not intersect with an already existing entry and the
1511 * file end.
1512 *
1513 * @returns The number of bytes the entry can hold of the requested amount
1514 * of byte.
1515 * @param pEndpoint The endpoint.
1516 * @param pEndpointCache The endpoint cache.
1517 * @param off The start offset.
1518 * @param cb The number of bytes the entry needs to hold at least.
1519 * @param uAlignment Alignment of the boundary sizes.
1520 * @param poffAligned Where to store the aligned offset.
1521 * @param pcbAligned Where to store the aligned size of the entry.
1522 */
1523static size_t pdmacFileEpCacheEntryBoundariesCalc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1524 PPDMACFILEENDPOINTCACHE pEndpointCache,
1525 RTFOFF off, size_t cb,
1526 unsigned uAlignment,
1527 RTFOFF *poffAligned, size_t *pcbAligned)
1528{
1529 size_t cbAligned;
1530 size_t cbInEntry = 0;
1531 RTFOFF offAligned;
1532 PPDMACFILECACHEENTRY pEntryAbove = NULL;
1533 PPDMACFILECACHEENTRY pEntryBelow = NULL;
1534
1535 /* Get the best fit entries around the offset */
1536 pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off,
1537 &pEntryAbove, &pEntryBelow);
1538
1539 /* Log the info */
1540 LogFlow(("%sest fit entry below off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1541 pEntryBelow ? "B" : "No b",
1542 off,
1543 pEntryBelow ? pEntryBelow->Core.Key : 0,
1544 pEntryBelow ? pEntryBelow->Core.KeyLast : 0,
1545 pEntryBelow ? pEntryBelow->cbData : 0));
1546
1547 LogFlow(("%sest fit entry above off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1548 pEntryAbove ? "B" : "No b",
1549 off,
1550 pEntryAbove ? pEntryAbove->Core.Key : 0,
1551 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1552 pEntryAbove ? pEntryAbove->cbData : 0));
1553
1554 /* Align the offset first. */
1555 offAligned = off & ~(RTFOFF)(512-1);
1556 if ( pEntryBelow
1557 && offAligned <= pEntryBelow->Core.KeyLast)
1558 offAligned = pEntryBelow->Core.KeyLast;
1559
1560 if ( pEntryAbove
1561 && off + (RTFOFF)cb > pEntryAbove->Core.Key)
1562 {
1563 cbInEntry = pEntryAbove->Core.Key - off;
1564 cbAligned = pEntryAbove->Core.Key - offAligned;
1565 }
1566 else
1567 {
1568 /*
1569 * Align the size to a 4KB boundary.
1570 * Memory size is aligned to a page boundary
1571 * and memory is wasted if the size is rather small.
1572 * (For example reads with a size of 512 bytes).
1573 */
1574 cbInEntry = cb;
1575 cbAligned = RT_ALIGN_Z(cb + (off - offAligned), uAlignment);
1576
1577 /*
1578 * Clip to file size if the original request doesn't
1579 * exceed the file (not an appending write)
1580 */
1581 uint64_t cbReq = off + (RTFOFF)cb;
1582 if (cbReq >= pEndpoint->cbFile)
1583 cbAligned = cbReq - offAligned;
1584 else
1585 cbAligned = RT_MIN(pEndpoint->cbFile - offAligned, cbAligned);
1586 if (pEntryAbove)
1587 {
1588 Assert(pEntryAbove->Core.Key >= off);
1589 cbAligned = RT_MIN(cbAligned, (uint64_t)pEntryAbove->Core.Key - offAligned);
1590 }
1591 }
1592
1593 /* A few sanity checks */
1594 AssertMsg(!pEntryBelow || pEntryBelow->Core.KeyLast < offAligned,
1595 ("Aligned start offset intersects with another cache entry\n"));
1596 AssertMsg(!pEntryAbove || (offAligned + (RTFOFF)cbAligned) <= pEntryAbove->Core.Key,
1597 ("Aligned size intersects with another cache entry\n"));
1598 Assert(cbInEntry <= cbAligned);
1599 AssertMsg( ( offAligned + (RTFOFF)cbAligned <= (RTFOFF)pEndpoint->cbFile
1600 && off + (RTFOFF)cb <= (RTFOFF)pEndpoint->cbFile)
1601 || (offAligned + (RTFOFF)cbAligned <= off + (RTFOFF)cb),
1602 ("Unwanted file size increase\n"));
1603
1604 if (pEntryBelow)
1605 pdmacFileEpCacheEntryRelease(pEntryBelow);
1606 if (pEntryAbove)
1607 pdmacFileEpCacheEntryRelease(pEntryAbove);
1608
1609 LogFlow(("offAligned=%RTfoff cbAligned=%u\n", offAligned, cbAligned));
1610
1611 *poffAligned = offAligned;
1612 *pcbAligned = cbAligned;
1613
1614 return cbInEntry;
1615}
1616
1617/**
1618 * Create a new cache entry evicting data from the cache if required.
1619 *
1620 * @returns Pointer to the new cache entry or NULL
1621 * if not enough bytes could be evicted from the cache.
1622 * @param pEndpoint The endpoint.
1623 * @param pEndpointCache The endpoint cache.
1624 * @param off The offset.
1625 * @param cb Number of bytes the cache entry should have.
1626 * @param uAlignment Alignment the size of the entry should have.
1627 * @param pcbData Where to store the number of bytes the new
1628 * entry can hold. May be lower than actually requested
1629 * due to another entry intersecting the access range.
1630 */
1631static PPDMACFILECACHEENTRY pdmacFileEpCacheEntryCreate(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1632 PPDMACFILEENDPOINTCACHE pEndpointCache,
1633 RTFOFF off, size_t cb,
1634 unsigned uAlignment,
1635 size_t *pcbData)
1636{
1637 RTFOFF offStart = 0;
1638 size_t cbEntry = 0;
1639 PPDMACFILECACHEENTRY pEntryNew = NULL;
1640 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1641 uint8_t *pbBuffer = NULL;
1642
1643 *pcbData = pdmacFileEpCacheEntryBoundariesCalc(pEndpoint,
1644 pEndpointCache,
1645 off, cb,
1646 uAlignment,
1647 &offStart, &cbEntry);
1648
1649 pdmacFileCacheLockEnter(pCache);
1650 bool fEnough = pdmacFileCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1651
1652 if (fEnough)
1653 {
1654 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1655
1656 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint,
1657 offStart, cbEntry,
1658 pbBuffer);
1659 if (RT_LIKELY(pEntryNew))
1660 {
1661 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1662 pdmacFileCacheAdd(pCache, cbEntry);
1663 pdmacFileCacheLockLeave(pCache);
1664
1665 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1666
1667 AssertMsg( (off >= pEntryNew->Core.Key)
1668 && (off + (RTFOFF)*pcbData <= pEntryNew->Core.KeyLast + 1),
1669 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1670 off, pEntryNew->Core.Key));
1671 }
1672 else
1673 pdmacFileCacheLockLeave(pCache);
1674 }
1675 else
1676 pdmacFileCacheLockLeave(pCache);
1677
1678 return pEntryNew;
1679}
1680
1681/**
1682 * Reads the specified data from the endpoint using the cache if possible.
1683 *
1684 * @returns VBox status code.
1685 * @param pEndpoint The endpoint to read from.
1686 * @param pTask The task structure used as identifier for this request.
1687 * @param off The offset to start reading from.
1688 * @param paSegments Pointer to the array holding the destination buffers.
1689 * @param cSegments Number of segments in the array.
1690 * @param cbRead Number of bytes to read.
1691 */
1692int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1693 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1694 size_t cbRead)
1695{
1696 int rc = VINF_SUCCESS;
1697 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1698 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1699 PPDMACFILECACHEENTRY pEntry;
1700
1701 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1702 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1703
1704 pTask->cbTransferLeft = cbRead;
1705 /* Set to completed to make sure that the task is valid while we access it. */
1706 ASMAtomicWriteBool(&pTask->fCompleted, true);
1707
1708 /* Init the I/O memory context */
1709 PDMIOMEMCTX IoMemCtx;
1710 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1711
1712 while (cbRead)
1713 {
1714 size_t cbToRead;
1715
1716 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1717
1718 /*
1719 * If there is no entry we try to create a new one eviciting unused pages
1720 * if the cache is full. If this is not possible we will pass the request through
1721 * and skip the caching (all entries may be still in progress so they can't
1722 * be evicted)
1723 * If we have an entry it can be in one of the LRU lists where the entry
1724 * contains data (recently used or frequently used LRU) so we can just read
1725 * the data we need and put the entry at the head of the frequently used LRU list.
1726 * In case the entry is in one of the ghost lists it doesn't contain any data.
1727 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1728 */
1729 if (pEntry)
1730 {
1731 RTFOFF OffDiff = off - pEntry->Core.Key;
1732
1733 AssertMsg(off >= pEntry->Core.Key,
1734 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1735 off, pEntry->Core.Key));
1736
1737 AssertPtr(pEntry->pList);
1738
1739 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1740
1741 AssertMsg(off + (RTFOFF)cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1742 ("Buffer of cache entry exceeded off=%RTfoff cbToRead=%d\n",
1743 off, cbToRead));
1744
1745 cbRead -= cbToRead;
1746
1747 if (!cbRead)
1748 STAM_COUNTER_INC(&pCache->cHits);
1749 else
1750 STAM_COUNTER_INC(&pCache->cPartialHits);
1751
1752 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1753
1754 /* Ghost lists contain no data. */
1755 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1756 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1757 {
1758 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1759 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1760 0))
1761 {
1762 /* Entry is deprecated. Read data from the new buffer. */
1763 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbDataReplace + OffDiff, cbToRead);
1764 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1765 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1766 }
1767 else
1768 {
1769 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1770 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1771 PDMACFILECACHE_ENTRY_IS_DIRTY))
1772 {
1773 /* Entry didn't completed yet. Append to the list */
1774 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1775 &IoMemCtx,
1776 OffDiff, cbToRead,
1777 false /* fWrite */);
1778 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1779 }
1780 else
1781 {
1782 /* Read as much as we can from the entry. */
1783 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbData + OffDiff, cbToRead);
1784 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1785 }
1786 }
1787
1788 /* Move this entry to the top position */
1789 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1790 {
1791 pdmacFileCacheLockEnter(pCache);
1792 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1793 pdmacFileCacheLockLeave(pCache);
1794 }
1795 /* Release the entry */
1796 pdmacFileEpCacheEntryRelease(pEntry);
1797 }
1798 else
1799 {
1800 uint8_t *pbBuffer = NULL;
1801
1802 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1803
1804 pdmacFileCacheLockEnter(pCache);
1805 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1806 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1807
1808 /* Move the entry to Am and fetch it to the cache. */
1809 if (fEnough)
1810 {
1811 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1812 pdmacFileCacheAdd(pCache, pEntry->cbData);
1813 pdmacFileCacheLockLeave(pCache);
1814
1815 if (pbBuffer)
1816 pEntry->pbData = pbBuffer;
1817 else
1818 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1819 AssertPtr(pEntry->pbData);
1820
1821 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1822 &IoMemCtx,
1823 OffDiff, cbToRead,
1824 false /* fWrite */);
1825 pdmacFileCacheReadFromEndpoint(pEntry);
1826 /* Release the entry */
1827 pdmacFileEpCacheEntryRelease(pEntry);
1828 }
1829 else
1830 {
1831 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1832 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1833 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1834 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1835 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1836
1837 pdmacFileCacheLockLeave(pCache);
1838
1839 RTMemFree(pEntry);
1840
1841 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1842 &IoMemCtx, off, cbToRead,
1843 PDMACTASKFILETRANSFER_READ);
1844 }
1845 }
1846 }
1847 else
1848 {
1849#ifdef VBOX_WITH_IO_READ_CACHE
1850 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
1851 PPDMACFILECACHEENTRY pEntryNew = pdmacFileEpCacheEntryCreate(pEndpoint,
1852 pEndpointCache,
1853 off, cbRead,
1854 PAGE_SIZE,
1855 &cbToRead);
1856
1857 cbRead -= cbToRead;
1858
1859 if (pEntryNew)
1860 {
1861 if (!cbRead)
1862 STAM_COUNTER_INC(&pCache->cMisses);
1863 else
1864 STAM_COUNTER_INC(&pCache->cPartialHits);
1865
1866 pdmacFileEpCacheEntryWaitersAdd(pEntryNew, pTask,
1867 &IoMemCtx,
1868 off - pEntryNew->Core.Key,
1869 cbToRead,
1870 false /* fWrite */);
1871 pdmacFileCacheReadFromEndpoint(pEntryNew);
1872 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1873 }
1874 else
1875 {
1876 /*
1877 * There is not enough free space in the cache.
1878 * Pass the request directly to the I/O manager.
1879 */
1880 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1881
1882 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1883 &IoMemCtx, off, cbToRead,
1884 PDMACTASKFILETRANSFER_READ);
1885 }
1886#else
1887 /* Clip read size if neccessary. */
1888 PPDMACFILECACHEENTRY pEntryAbove;
1889 pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off,
1890 &pEntryAbove, NULL);
1891
1892 if (pEntryAbove)
1893 {
1894 if (off + (RTFOFF)cbRead > pEntryAbove->Core.Key)
1895 cbToRead = pEntryAbove->Core.Key - off;
1896 else
1897 cbToRead = cbRead;
1898
1899 pdmacFileEpCacheEntryRelease(pEntryAbove);
1900 }
1901 else
1902 cbToRead = cbRead;
1903
1904 cbRead -= cbToRead;
1905 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1906 &IoMemCtx, off, cbToRead,
1907 PDMACTASKFILETRANSFER_READ);
1908#endif
1909 }
1910 off += cbToRead;
1911 }
1912
1913 ASMAtomicWriteBool(&pTask->fCompleted, false);
1914
1915 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1916 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1917 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1918 else
1919 rc = VINF_AIO_TASK_PENDING;
1920
1921 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1922
1923 return rc;
1924}
1925
1926/**
1927 * Writes the given data to the endpoint using the cache if possible.
1928 *
1929 * @returns VBox status code.
1930 * @param pEndpoint The endpoint to write to.
1931 * @param pTask The task structure used as identifier for this request.
1932 * @param off The offset to start writing to
1933 * @param paSegments Pointer to the array holding the source buffers.
1934 * @param cSegments Number of segments in the array.
1935 * @param cbWrite Number of bytes to write.
1936 */
1937int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1938 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1939 size_t cbWrite)
1940{
1941 int rc = VINF_SUCCESS;
1942 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1943 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1944 PPDMACFILECACHEENTRY pEntry;
1945
1946 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1947 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1948
1949 pTask->cbTransferLeft = cbWrite;
1950 /* Set to completed to make sure that the task is valid while we access it. */
1951 ASMAtomicWriteBool(&pTask->fCompleted, true);
1952
1953 /* Init the I/O memory context */
1954 PDMIOMEMCTX IoMemCtx;
1955 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1956
1957 while (cbWrite)
1958 {
1959 size_t cbToWrite;
1960
1961 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1962
1963 if (pEntry)
1964 {
1965 /* Write the data into the entry and mark it as dirty */
1966 AssertPtr(pEntry->pList);
1967
1968 RTFOFF OffDiff = off - pEntry->Core.Key;
1969
1970 AssertMsg(off >= pEntry->Core.Key,
1971 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1972 off, pEntry->Core.Key));
1973
1974 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1975 cbWrite -= cbToWrite;
1976
1977 if (!cbWrite)
1978 STAM_COUNTER_INC(&pCache->cHits);
1979 else
1980 STAM_COUNTER_INC(&pCache->cPartialHits);
1981
1982 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1983
1984 /* Ghost lists contain no data. */
1985 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1986 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1987 {
1988 /* Check if the buffer is deprecated. */
1989 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1990 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1991 0))
1992 {
1993 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1994 ("Entry is deprecated but not in progress\n"));
1995 AssertPtr(pEntry->pbDataReplace);
1996
1997 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1998
1999 /* Update the data from the write. */
2000 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2001 pEntry->pbDataReplace + OffDiff,
2002 cbToWrite);
2003 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2004 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
2005 }
2006 else /* Deprecated flag not set */
2007 {
2008 /* Check if the entry is dirty. */
2009 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
2010 PDMACFILECACHE_ENTRY_IS_DIRTY,
2011 0))
2012 {
2013 /* If it is dirty but not in progrss just update the data. */
2014 if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS))
2015 {
2016 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2017 pEntry->pbData + OffDiff,
2018 cbToWrite);
2019 }
2020 else
2021 {
2022 Assert(!pEntry->pbDataReplace);
2023
2024 /* Deprecate the current buffer. */
2025 if (!pEntry->pWaitingHead)
2026 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2027
2028 /* If we are out of memory or have waiting segments
2029 * defer the write. */
2030 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
2031 {
2032 /* The data isn't written to the file yet */
2033 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
2034 &IoMemCtx,
2035 OffDiff, cbToWrite,
2036 true /* fWrite */);
2037 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
2038 }
2039 else /* Deprecate buffer */
2040 {
2041 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
2042 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
2043
2044 /* Copy the data before the update. */
2045 if (OffDiff)
2046 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
2047
2048 /* Copy data behind the update. */
2049 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
2050 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
2051 pEntry->pbData + OffDiff + cbToWrite,
2052 (pEntry->cbData - OffDiff - cbToWrite));
2053
2054 /* Update the data from the write. */
2055 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2056 pEntry->pbDataReplace + OffDiff,
2057 cbToWrite);
2058 /* We are done here. A new write is initiated if the current request completes. */
2059 }
2060 }
2061
2062 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2063 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
2064 }
2065 else /* Dirty bit not set */
2066 {
2067 /*
2068 * Check if a read is in progress for this entry.
2069 * We have to defer processing in that case.
2070 */
2071 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
2072 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
2073 0))
2074 {
2075 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
2076 &IoMemCtx,
2077 OffDiff, cbToWrite,
2078 true /* fWrite */);
2079 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
2080 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
2081 }
2082 else /* I/O in progress flag not set */
2083 {
2084 /* Write as much as we can into the entry and update the file. */
2085 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2086 pEntry->pbData + OffDiff,
2087 cbToWrite);
2088 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2089
2090 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
2091 if (fCommit)
2092 pdmacFileCacheCommitDirtyEntries(pCache);
2093 }
2094 } /* Dirty bit not set */
2095
2096 /* Move this entry to the top position */
2097 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2098 {
2099 pdmacFileCacheLockEnter(pCache);
2100 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2101 pdmacFileCacheLockLeave(pCache);
2102 } /* Deprecated flag not set. */
2103 }
2104 pdmacFileEpCacheEntryRelease(pEntry);
2105 }
2106 else /* Entry is on the ghost list */
2107 {
2108 uint8_t *pbBuffer = NULL;
2109
2110 pdmacFileCacheLockEnter(pCache);
2111 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2112 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2113
2114 if (fEnough)
2115 {
2116 /* Move the entry to Am and fetch it to the cache. */
2117 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2118 pdmacFileCacheAdd(pCache, pEntry->cbData);
2119 pdmacFileCacheLockLeave(pCache);
2120
2121 if (pbBuffer)
2122 pEntry->pbData = pbBuffer;
2123 else
2124 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2125 AssertPtr(pEntry->pbData);
2126
2127 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
2128 &IoMemCtx,
2129 OffDiff, cbToWrite,
2130 true /* fWrite */);
2131 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
2132 pdmacFileCacheReadFromEndpoint(pEntry);
2133
2134 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2135 pdmacFileEpCacheEntryRelease(pEntry);
2136 }
2137 else
2138 {
2139 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
2140 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2141 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
2142 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2143 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
2144
2145 pdmacFileCacheLockLeave(pCache);
2146
2147 RTMemFree(pEntry);
2148 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
2149 &IoMemCtx, off, cbToWrite,
2150 PDMACTASKFILETRANSFER_WRITE);
2151 }
2152 }
2153 }
2154 else /* No entry found */
2155 {
2156 /*
2157 * No entry found. Try to create a new cache entry to store the data in and if that fails
2158 * write directly to the file.
2159 */
2160 PPDMACFILECACHEENTRY pEntryNew = pdmacFileEpCacheEntryCreate(pEndpoint,
2161 pEndpointCache,
2162 off, cbWrite,
2163 512,
2164 &cbToWrite);
2165
2166 cbWrite -= cbToWrite;
2167
2168 if (pEntryNew)
2169 {
2170 RTFOFF offDiff = off - pEntryNew->Core.Key;
2171
2172 STAM_COUNTER_INC(&pCache->cHits);
2173
2174 /*
2175 * Check if it is possible to just write the data without waiting
2176 * for it to get fetched first.
2177 */
2178 if (!offDiff && pEntryNew->cbData == cbToWrite)
2179 {
2180 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2181 pEntryNew->pbData,
2182 cbToWrite);
2183 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2184
2185 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntryNew);
2186 if (fCommit)
2187 pdmacFileCacheCommitDirtyEntries(pCache);
2188 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2189 }
2190 else
2191 {
2192 /* Defer the write and fetch the data from the endpoint. */
2193 pdmacFileEpCacheEntryWaitersAdd(pEntryNew, pTask,
2194 &IoMemCtx,
2195 offDiff, cbToWrite,
2196 true /* fWrite */);
2197 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
2198 pdmacFileCacheReadFromEndpoint(pEntryNew);
2199 }
2200
2201 pdmacFileEpCacheEntryRelease(pEntryNew);
2202 }
2203 else
2204 {
2205 /*
2206 * There is not enough free space in the cache.
2207 * Pass the request directly to the I/O manager.
2208 */
2209 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2210
2211 STAM_COUNTER_INC(&pCache->cMisses);
2212
2213 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
2214 &IoMemCtx, off, cbToWrite,
2215 PDMACTASKFILETRANSFER_WRITE);
2216 }
2217 }
2218
2219 off += cbToWrite;
2220 }
2221
2222 ASMAtomicWriteBool(&pTask->fCompleted, false);
2223
2224 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
2225 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
2226 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2227 else
2228 rc = VINF_AIO_TASK_PENDING;
2229
2230 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2231
2232 return rc;
2233}
2234
2235int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
2236{
2237 int rc = VINF_SUCCESS;
2238
2239 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p\n",
2240 pEndpoint, pEndpoint->Core.pszUri, pTask));
2241
2242 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
2243 rc = VERR_RESOURCE_BUSY;
2244 else
2245 {
2246 /* Check for dirty entries in the cache. */
2247 pdmacFileCacheEndpointCommit(&pEndpoint->DataCache);
2248 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
2249 {
2250 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
2251 rc = VINF_AIO_TASK_PENDING;
2252 }
2253 else
2254 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2255 }
2256
2257 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2258 return rc;
2259}
2260
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette