VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 26994

Last change on this file since 26994 was 26956, checked in by vboxsync, 15 years ago

I/O Cache: Fix a crash

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 80.5 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 26956 2010-03-02 16:03:58Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the 2Q cache algorithm.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
31#include <iprt/types.h>
32#include <iprt/mem.h>
33#include <iprt/path.h>
34#include <VBox/log.h>
35#include <VBox/stam.h>
36
37#include "PDMAsyncCompletionFileInternal.h"
38
39/**
40 * A I/O memory context.
41 */
42typedef struct PDMIOMEMCTX
43{
44 /** Pointer to the scatter/gather list. */
45 PCPDMDATASEG paDataSeg;
46 /** Number of segments. */
47 size_t cSegments;
48 /** Current segment we are in. */
49 unsigned iSegIdx;
50 /** Pointer to the current buffer. */
51 uint8_t *pbBuf;
52 /** Number of bytes left in the current buffer. */
53 size_t cbBufLeft;
54} PDMIOMEMCTX, *PPDMIOMEMCTX;
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0)
63
64# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
65 do \
66 { \
67 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
68 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
69 } while(0)
70
71# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
72 do \
73 { \
74 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
75 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
76 } while(0)
77
78#else
79# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
80# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
81# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
82#endif
83
84/*******************************************************************************
85* Internal Functions *
86*******************************************************************************/
87static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
88
89/**
90 * Decrement the reference counter of the given cache entry.
91 *
92 * @returns nothing.
93 * @param pEntry The entry to release.
94 */
95DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
96{
97 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
98 ASMAtomicDecU32(&pEntry->cRefs);
99}
100
101/**
102 * Increment the reference counter of the given cache entry.
103 *
104 * @returns nothing.
105 * @param pEntry The entry to reference.
106 */
107DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
108{
109 ASMAtomicIncU32(&pEntry->cRefs);
110}
111
112/**
113 * Initialize a I/O memory context.
114 *
115 * @returns nothing
116 * @param pIoMemCtx Pointer to a unitialized I/O memory context.
117 * @param paDataSeg Pointer to the S/G list.
118 * @param cSegments Number of segments in the S/G list.
119 */
120DECLINLINE(void) pdmIoMemCtxInit(PPDMIOMEMCTX pIoMemCtx, PCPDMDATASEG paDataSeg, size_t cSegments)
121{
122 AssertMsg((cSegments > 0) && paDataSeg, ("Trying to initialize a I/O memory context without a S/G list\n"));
123
124 pIoMemCtx->paDataSeg = paDataSeg;
125 pIoMemCtx->cSegments = cSegments;
126 pIoMemCtx->iSegIdx = 0;
127 pIoMemCtx->pbBuf = (uint8_t *)paDataSeg[0].pvSeg;
128 pIoMemCtx->cbBufLeft = paDataSeg[0].cbSeg;
129}
130
131/**
132 * Return a buffer from the I/O memory context.
133 *
134 * @returns Pointer to the buffer
135 * @param pIoMemCtx Pointer to the I/O memory context.
136 * @param pcbData Pointer to the amount of byte requested.
137 * If the current buffer doesn't have enough bytes left
138 * the amount is returned in the variable.
139 */
140DECLINLINE(uint8_t *) pdmIoMemCtxGetBuffer(PPDMIOMEMCTX pIoMemCtx, size_t *pcbData)
141{
142 size_t cbData = RT_MIN(*pcbData, pIoMemCtx->cbBufLeft);
143 uint8_t *pbBuf = pIoMemCtx->pbBuf;
144
145 pIoMemCtx->cbBufLeft -= cbData;
146
147 /* Advance to the next segment if required. */
148 if (!pIoMemCtx->cbBufLeft)
149 {
150 pIoMemCtx->iSegIdx++;
151
152 if (RT_UNLIKELY(pIoMemCtx->iSegIdx == pIoMemCtx->cSegments))
153 {
154 pIoMemCtx->cbBufLeft = 0;
155 pIoMemCtx->pbBuf = NULL;
156 }
157 else
158 {
159 pIoMemCtx->pbBuf = (uint8_t *)pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].pvSeg;
160 pIoMemCtx->cbBufLeft = pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].cbSeg;
161 }
162
163 *pcbData = cbData;
164 }
165 else
166 pIoMemCtx->pbBuf += cbData;
167
168 return pbBuf;
169}
170
171#ifdef DEBUG
172static void pdmacFileCacheValidate(PPDMACFILECACHEGLOBAL pCache)
173{
174 /* Amount of cached data should never exceed the maximum amount. */
175 AssertMsg(pCache->cbCached <= pCache->cbMax,
176 ("Current amount of cached data exceeds maximum\n"));
177
178 /* The amount of cached data in the LRU and FRU list should match cbCached */
179 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
180 ("Amount of cached data doesn't match\n"));
181
182 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
183 ("Paged out list exceeds maximum\n"));
184}
185#endif
186
187DECLINLINE(void) pdmacFileCacheLockEnter(PPDMACFILECACHEGLOBAL pCache)
188{
189 RTCritSectEnter(&pCache->CritSect);
190#ifdef DEBUG
191 pdmacFileCacheValidate(pCache);
192#endif
193}
194
195DECLINLINE(void) pdmacFileCacheLockLeave(PPDMACFILECACHEGLOBAL pCache)
196{
197#ifdef DEBUG
198 pdmacFileCacheValidate(pCache);
199#endif
200 RTCritSectLeave(&pCache->CritSect);
201}
202
203DECLINLINE(void) pdmacFileCacheSub(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
204{
205 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
206 pCache->cbCached -= cbAmount;
207}
208
209DECLINLINE(void) pdmacFileCacheAdd(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
210{
211 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
212 pCache->cbCached += cbAmount;
213}
214
215DECLINLINE(void) pdmacFileCacheListAdd(PPDMACFILELRULIST pList, uint32_t cbAmount)
216{
217 pList->cbCached += cbAmount;
218}
219
220DECLINLINE(void) pdmacFileCacheListSub(PPDMACFILELRULIST pList, uint32_t cbAmount)
221{
222 pList->cbCached -= cbAmount;
223}
224
225#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
226/**
227 * Checks consistency of a LRU list.
228 *
229 * @returns nothing
230 * @param pList The LRU list to check.
231 * @param pNotInList Element which is not allowed to occur in the list.
232 */
233static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
234{
235 PPDMACFILECACHEENTRY pCurr = pList->pHead;
236
237 /* Check that there are no double entries and no cycles in the list. */
238 while (pCurr)
239 {
240 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
241
242 while (pNext)
243 {
244 AssertMsg(pCurr != pNext,
245 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
246 pCurr, pList));
247 pNext = pNext->pNext;
248 }
249
250 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
251
252 if (!pCurr->pNext)
253 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
254
255 pCurr = pCurr->pNext;
256 }
257}
258#endif
259
260/**
261 * Unlinks a cache entry from the LRU list it is assigned to.
262 *
263 * @returns nothing.
264 * @param pEntry The entry to unlink.
265 */
266static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
267{
268 PPDMACFILELRULIST pList = pEntry->pList;
269 PPDMACFILECACHEENTRY pPrev, pNext;
270
271 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
272
273 AssertPtr(pList);
274
275#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
276 pdmacFileCacheCheckList(pList, NULL);
277#endif
278
279 pPrev = pEntry->pPrev;
280 pNext = pEntry->pNext;
281
282 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
283 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
284
285 if (pPrev)
286 pPrev->pNext = pNext;
287 else
288 {
289 pList->pHead = pNext;
290
291 if (pNext)
292 pNext->pPrev = NULL;
293 }
294
295 if (pNext)
296 pNext->pPrev = pPrev;
297 else
298 {
299 pList->pTail = pPrev;
300
301 if (pPrev)
302 pPrev->pNext = NULL;
303 }
304
305 pEntry->pList = NULL;
306 pEntry->pPrev = NULL;
307 pEntry->pNext = NULL;
308 pdmacFileCacheListSub(pList, pEntry->cbData);
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmacFileCacheCheckList(pList, pEntry);
311#endif
312}
313
314/**
315 * Adds a cache entry to the given LRU list unlinking it from the currently
316 * assigned list if needed.
317 *
318 * @returns nothing.
319 * @param pList List to the add entry to.
320 * @param pEntry Entry to add.
321 */
322static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
323{
324 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
325#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
326 pdmacFileCacheCheckList(pList, NULL);
327#endif
328
329 /* Remove from old list if needed */
330 if (pEntry->pList)
331 pdmacFileCacheEntryRemoveFromList(pEntry);
332
333 pEntry->pNext = pList->pHead;
334 if (pList->pHead)
335 pList->pHead->pPrev = pEntry;
336 else
337 {
338 Assert(!pList->pTail);
339 pList->pTail = pEntry;
340 }
341
342 pEntry->pPrev = NULL;
343 pList->pHead = pEntry;
344 pdmacFileCacheListAdd(pList, pEntry->cbData);
345 pEntry->pList = pList;
346#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
347 pdmacFileCacheCheckList(pList, NULL);
348#endif
349}
350
351/**
352 * Destroys a LRU list freeing all entries.
353 *
354 * @returns nothing
355 * @param pList Pointer to the LRU list to destroy.
356 *
357 * @note The caller must own the critical section of the cache.
358 */
359static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
360{
361 while (pList->pHead)
362 {
363 PPDMACFILECACHEENTRY pEntry = pList->pHead;
364
365 pList->pHead = pEntry->pNext;
366
367 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
368 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
369
370 RTMemPageFree(pEntry->pbData);
371 RTMemFree(pEntry);
372 }
373}
374
375/**
376 * Tries to remove the given amount of bytes from a given list in the cache
377 * moving the entries to one of the given ghosts lists
378 *
379 * @returns Amount of data which could be freed.
380 * @param pCache Pointer to the global cache data.
381 * @param cbData The amount of the data to free.
382 * @param pListSrc The source list to evict data from.
383 * @param pGhostListSrc The ghost list removed entries should be moved to
384 * NULL if the entry should be freed.
385 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
386 * @param ppbBuf Where to store the address of the buffer if an entry with the
387 * same size was found and fReuseBuffer is true.
388 *
389 * @note This function may return fewer bytes than requested because entries
390 * may be marked as non evictable if they are used for I/O at the
391 * moment.
392 */
393static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
394 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
395 bool fReuseBuffer, uint8_t **ppbBuffer)
396{
397 size_t cbEvicted = 0;
398
399 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
400
401 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
402 AssertMsg( !pGhostListDst
403 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
404 ("Destination list must be NULL or the recently used but paged out list\n"));
405
406 if (fReuseBuffer)
407 {
408 AssertPtr(ppbBuffer);
409 *ppbBuffer = NULL;
410 }
411
412 /* Start deleting from the tail. */
413 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
414
415 while ((cbEvicted < cbData) && pEntry)
416 {
417 PPDMACFILECACHEENTRY pCurr = pEntry;
418
419 pEntry = pEntry->pPrev;
420
421 /* We can't evict pages which are currently in progress or dirty but not in progress */
422 if ( !(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
423 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
424 {
425 /* Ok eviction candidate. Grab the endpoint semaphore and check again
426 * because somebody else might have raced us. */
427 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
428 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (!(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
431 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
432 {
433 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
434 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
435 Assert(!pCurr->pbDataReplace);
436
437 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
438
439 if (fReuseBuffer && (pCurr->cbData == cbData))
440 {
441 STAM_COUNTER_INC(&pCache->StatBuffersReused);
442 *ppbBuffer = pCurr->pbData;
443 }
444 else if (pCurr->pbData)
445 RTMemPageFree(pCurr->pbData);
446
447 pCurr->pbData = NULL;
448 cbEvicted += pCurr->cbData;
449
450 pdmacFileCacheEntryRemoveFromList(pCurr);
451 pdmacFileCacheSub(pCache, pCurr->cbData);
452
453 if (pGhostListDst)
454 {
455 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
456
457 PPDMACFILECACHEENTRY pGhostEntFree = pGhostListDst->pTail;
458
459 /* We have to remove the last entries from the paged out list. */
460 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
461 && pGhostEntFree)
462 {
463 PPDMACFILECACHEENTRY pFree = pGhostEntFree;
464 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
465
466 pGhostEntFree = pGhostEntFree->pPrev;
467
468 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
469
470 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
471 {
472 pdmacFileCacheEntryRemoveFromList(pFree);
473
474 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
475 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
476 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
477
478 RTMemFree(pFree);
479 }
480
481 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
482 }
483
484 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
485 {
486 /* Couldn't remove enough entries. Delete */
487 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
488 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
489 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
490
491 RTMemFree(pCurr);
492 }
493 else
494 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
495 }
496 else
497 {
498 /* Delete the entry from the AVL tree it is assigned to. */
499 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
500 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
501 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
502
503 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
504 RTMemFree(pCurr);
505 }
506 }
507
508 }
509 else
510 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
511 }
512
513 return cbEvicted;
514}
515
516static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
517{
518 size_t cbRemoved = 0;
519
520 if ((pCache->cbCached + cbData) < pCache->cbMax)
521 return true;
522 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
523 {
524 /* Try to evict as many bytes as possible from A1in */
525 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
526 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
527
528 /*
529 * If it was not possible to remove enough entries
530 * try the frequently accessed cache.
531 */
532 if (cbRemoved < cbData)
533 {
534 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
535
536 /*
537 * If we removed something we can't pass the reuse buffer flag anymore because
538 * we don't need to evict that much data
539 */
540 if (!cbRemoved)
541 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
542 NULL, fReuseBuffer, ppbBuffer);
543 else
544 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
545 NULL, false, NULL);
546 }
547 }
548 else
549 {
550 /* We have to remove entries from frequently access list. */
551 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
552 NULL, fReuseBuffer, ppbBuffer);
553 }
554
555 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
556 return (cbRemoved >= cbData);
557}
558
559/**
560 * Initiates a read I/O task for the given entry.
561 *
562 * @returns nothing.
563 * @param pEntry The entry to fetch the data to.
564 */
565static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
566{
567 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
568
569 /* Make sure no one evicts the entry while it is accessed. */
570 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
571
572 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
573 AssertPtr(pIoTask);
574
575 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
576
577 pIoTask->pEndpoint = pEntry->pEndpoint;
578 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
579 pIoTask->Off = pEntry->Core.Key;
580 pIoTask->DataSeg.cbSeg = pEntry->cbData;
581 pIoTask->DataSeg.pvSeg = pEntry->pbData;
582 pIoTask->pvUser = pEntry;
583 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
584
585 /* Send it off to the I/O manager. */
586 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
587}
588
589/**
590 * Initiates a write I/O task for the given entry.
591 *
592 * @returns nothing.
593 * @param pEntry The entry to read the data from.
594 */
595static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
596{
597 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
598
599 /* Make sure no one evicts the entry while it is accessed. */
600 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
601
602 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
603 AssertPtr(pIoTask);
604
605 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
606
607 pIoTask->pEndpoint = pEntry->pEndpoint;
608 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
609 pIoTask->Off = pEntry->Core.Key;
610 pIoTask->DataSeg.cbSeg = pEntry->cbData;
611 pIoTask->DataSeg.pvSeg = pEntry->pbData;
612 pIoTask->pvUser = pEntry;
613 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
614 ASMAtomicIncU32(&pEntry->pEndpoint->DataCache.cWritesOutstanding);
615
616 /* Send it off to the I/O manager. */
617 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
618}
619
620/**
621 * Commit a single dirty entry to the endpoint
622 *
623 * @returns nothing
624 * @param pEntry The entry to commit.
625 */
626static void pdmacFileCacheEntryCommit(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
627{
628 NOREF(pEndpointCache);
629 AssertMsg( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
630 && !(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
631 ("Invalid flags set for entry %#p\n", pEntry));
632
633 pdmacFileCacheWriteToEndpoint(pEntry);
634}
635
636/**
637 * Commit all dirty entries for a single endpoint.
638 *
639 * @returns nothing.
640 * @param pEndpointCache The endpoint cache to commit.
641 */
642static void pdmacFileCacheEndpointCommit(PPDMACFILEENDPOINTCACHE pEndpointCache)
643{
644 uint32_t cbCommitted = 0;
645 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
646
647 /* The list is moved to a new header to reduce locking overhead. */
648 RTLISTNODE ListDirtyNotCommitted;
649 RTSPINLOCKTMP Tmp;
650
651 RTListInit(&ListDirtyNotCommitted);
652 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
653 RTListMove(&ListDirtyNotCommitted, &pEndpointCache->ListDirtyNotCommitted);
654 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
655
656 if (!RTListIsEmpty(&ListDirtyNotCommitted))
657 {
658 PPDMACFILECACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
659 PDMACFILECACHEENTRY,
660 NodeNotCommitted);
661
662 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
663 {
664 PPDMACFILECACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMACFILECACHEENTRY,
665 NodeNotCommitted);
666 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
667 cbCommitted += pEntry->cbData;
668 RTListNodeRemove(&pEntry->NodeNotCommitted);
669 pEntry = pNext;
670 }
671
672 /* Commit the last endpoint */
673 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
674 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
675 RTListNodeRemove(&pEntry->NodeNotCommitted);
676 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
677 ("Committed all entries but list is not empty\n"));
678 }
679
680 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
681 AssertMsg(pEndpointCache->pCache->cbDirty >= cbCommitted,
682 ("Number of committed bytes exceeds number of dirty bytes\n"));
683 ASMAtomicSubU32(&pEndpointCache->pCache->cbDirty, cbCommitted);
684}
685
686/**
687 * Commit all dirty entries in the cache.
688 *
689 * @returns nothing.
690 * @param pCache The global cache instance.
691 */
692static void pdmacFileCacheCommitDirtyEntries(PPDMACFILECACHEGLOBAL pCache)
693{
694 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
695
696 if (!fCommitInProgress)
697 {
698 pdmacFileCacheLockEnter(pCache);
699 Assert(!RTListIsEmpty(&pCache->ListEndpoints));
700
701 PPDMACFILEENDPOINTCACHE pEndpointCache = RTListNodeGetFirst(&pCache->ListEndpoints,
702 PDMACFILEENDPOINTCACHE,
703 NodeCacheEndpoint);
704 AssertPtr(pEndpointCache);
705
706 while (!RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint))
707 {
708 pdmacFileCacheEndpointCommit(pEndpointCache);
709
710 pEndpointCache = RTListNodeGetNext(&pEndpointCache->NodeCacheEndpoint, PDMACFILEENDPOINTCACHE,
711 NodeCacheEndpoint);
712 }
713
714 /* Commit the last endpoint */
715 Assert(RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint));
716 pdmacFileCacheEndpointCommit(pEndpointCache);
717
718 pdmacFileCacheLockLeave(pCache);
719 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
720 }
721}
722
723/**
724 * Adds the given entry as a dirty to the cache.
725 *
726 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
727 * @param pEndpointCache The endpoint cache the entry belongs to.
728 * @param pEntry The entry to add.
729 */
730static bool pdmacFileCacheAddDirtyEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
731{
732 bool fDirtyBytesExceeded = false;
733 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
734
735 /* If the commit timer is disabled we commit right away. */
736 if (pCache->u32CommitTimeoutMs == 0)
737 {
738 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
739 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
740 }
741 else if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
742 {
743 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
744
745 RTSPINLOCKTMP Tmp;
746 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
747 RTListAppend(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
748 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
749
750 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
751
752 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
753 }
754
755 return fDirtyBytesExceeded;
756}
757
758
759/**
760 * Completes a task segment freeing all ressources and completes the task handle
761 * if everything was transfered.
762 *
763 * @returns Next task segment handle.
764 * @param pEndpointCache The endpoint cache.
765 * @param pTaskSeg Task segment to complete.
766 */
767static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
768{
769 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
770
771 uint32_t uOld = ASMAtomicSubS32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
772 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
773 if (!(uOld - pTaskSeg->cbTransfer)
774 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
775 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core, true);
776
777 RTMemFree(pTaskSeg);
778
779 return pNext;
780}
781
782/**
783 * Completion callback for I/O tasks.
784 *
785 * @returns nothing.
786 * @param pTask The completed task.
787 * @param pvUser Opaque user data.
788 */
789static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
790{
791 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
792 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
793 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
794 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
795
796 /* Reference the entry now as we are clearing the I/O in progres flag
797 * which protects the entry till now. */
798 pdmacFileEpCacheEntryRef(pEntry);
799
800 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
801 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
802
803 /* Process waiting segment list. The data in entry might have changed inbetween. */
804 bool fDirty = false;
805 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
806
807 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
808 ("The list tail was not updated correctly\n"));
809 pEntry->pWaitingTail = NULL;
810 pEntry->pWaitingHead = NULL;
811
812 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
813 {
814 AssertMsg(pEndpointCache->cWritesOutstanding > 0, ("Completed write request but outstanding task count is 0\n"));
815 ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
816
817 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
818
819 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
820 {
821 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
822
823 RTMemPageFree(pEntry->pbData);
824 pEntry->pbData = pEntry->pbDataReplace;
825 pEntry->pbDataReplace = NULL;
826 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
827 }
828 else
829 {
830 while (pCurr)
831 {
832 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
833
834 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
835 fDirty = true;
836
837 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
838 }
839 }
840 }
841 else
842 {
843 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
844 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IS_DIRTY | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
845 ("Invalid flags set\n"));
846
847 while (pCurr)
848 {
849 if (pCurr->fWrite)
850 {
851 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
852 fDirty = true;
853 }
854 else
855 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
856
857 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
858 }
859 }
860
861 bool fCommit = false;
862 if (fDirty)
863 fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
864
865 /* Complete a pending flush if all writes have completed */
866 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
867 {
868 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
869 if (pTaskFlush)
870 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
871 }
872
873 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
874
875 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
876 pdmacFileEpCacheEntryRelease(pEntry);
877
878 if (fCommit)
879 pdmacFileCacheCommitDirtyEntries(pCache);
880}
881
882/**
883 * Commit timer callback.
884 */
885static void pdmacFileCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
886{
887 PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser;
888 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
889
890 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
891
892 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
893 pdmacFileCacheCommitDirtyEntries(pCache);
894
895 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
896 LogFlowFunc(("Entries committed, going to sleep\n"));
897}
898
899/**
900 * Initializies the I/O cache.
901 *
902 * returns VBox status code.
903 * @param pClassFile The global class data for file endpoints.
904 * @param pCfgNode CFGM node to query configuration data from.
905 */
906int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
907{
908 int rc = VINF_SUCCESS;
909 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
910
911 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
912 AssertLogRelRCReturn(rc, rc);
913
914 RTListInit(&pCache->ListEndpoints);
915 pCache->cRefs = 0;
916 pCache->cbCached = 0;
917 pCache->fCommitInProgress = 0;
918 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
919
920 /* Initialize members */
921 pCache->LruRecentlyUsedIn.pHead = NULL;
922 pCache->LruRecentlyUsedIn.pTail = NULL;
923 pCache->LruRecentlyUsedIn.cbCached = 0;
924
925 pCache->LruRecentlyUsedOut.pHead = NULL;
926 pCache->LruRecentlyUsedOut.pTail = NULL;
927 pCache->LruRecentlyUsedOut.cbCached = 0;
928
929 pCache->LruFrequentlyUsed.pHead = NULL;
930 pCache->LruFrequentlyUsed.pTail = NULL;
931 pCache->LruFrequentlyUsed.cbCached = 0;
932
933 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
934 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
935 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
936
937 /** @todo r=aeichner: Experiment to find optimal default values */
938 rc = CFGMR3QueryU32Def(pCfgNode, "CacheCommitIntervalMs", &pCache->u32CommitTimeoutMs, 10000 /* 10sec */);
939 AssertLogRelRCReturn(rc, rc);
940 rc = CFGMR3QueryU32(pCfgNode, "CacheCommitThreshold", &pCache->cbCommitDirtyThreshold);
941 if ( rc == VERR_CFGM_VALUE_NOT_FOUND
942 || rc == VERR_CFGM_NO_PARENT)
943 {
944 /* Start committing after 50% of the cache are dirty */
945 pCache->cbCommitDirtyThreshold = pCache->cbMax / 2;
946 }
947 else
948 return rc;
949
950 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
951 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
952 "/PDM/AsyncCompletion/File/cbMax",
953 STAMUNIT_BYTES,
954 "Maximum cache size");
955 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
956 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
957 "/PDM/AsyncCompletion/File/cbCached",
958 STAMUNIT_BYTES,
959 "Currently used cache");
960 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
961 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
962 "/PDM/AsyncCompletion/File/cbCachedMruIn",
963 STAMUNIT_BYTES,
964 "Number of bytes cached in MRU list");
965 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
966 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
967 "/PDM/AsyncCompletion/File/cbCachedMruOut",
968 STAMUNIT_BYTES,
969 "Number of bytes cached in FRU list");
970 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
971 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
972 "/PDM/AsyncCompletion/File/cbCachedFru",
973 STAMUNIT_BYTES,
974 "Number of bytes cached in FRU ghost list");
975
976#ifdef VBOX_WITH_STATISTICS
977 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
978 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
979 "/PDM/AsyncCompletion/File/CacheHits",
980 STAMUNIT_COUNT, "Number of hits in the cache");
981 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
982 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
983 "/PDM/AsyncCompletion/File/CachePartialHits",
984 STAMUNIT_COUNT, "Number of partial hits in the cache");
985 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
986 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
987 "/PDM/AsyncCompletion/File/CacheMisses",
988 STAMUNIT_COUNT, "Number of misses when accessing the cache");
989 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
990 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
991 "/PDM/AsyncCompletion/File/CacheRead",
992 STAMUNIT_BYTES, "Number of bytes read from the cache");
993 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
994 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
995 "/PDM/AsyncCompletion/File/CacheWritten",
996 STAMUNIT_BYTES, "Number of bytes written to the cache");
997 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
998 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
999 "/PDM/AsyncCompletion/File/CacheTreeGet",
1000 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1001 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
1002 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1003 "/PDM/AsyncCompletion/File/CacheTreeInsert",
1004 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1005 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
1006 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1007 "/PDM/AsyncCompletion/File/CacheTreeRemove",
1008 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1009 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
1010 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1011 "/PDM/AsyncCompletion/File/CacheBuffersReused",
1012 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1013#endif
1014
1015 /* Initialize the critical section */
1016 rc = RTCritSectInit(&pCache->CritSect);
1017
1018 if (RT_SUCCESS(rc))
1019 {
1020 /* Create the commit timer */
1021 if (pCache->u32CommitTimeoutMs > 0)
1022 rc = TMR3TimerCreateInternal(pClassFile->Core.pVM, TMCLOCK_REAL,
1023 pdmacFileCacheCommitTimerCallback,
1024 pClassFile,
1025 "Cache-Commit",
1026 &pClassFile->Cache.pTimerCommit);
1027
1028 if (RT_SUCCESS(rc))
1029 {
1030 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
1031 LogRel(("AIOMgr: Cache commit interval is %u ms\n", pCache->u32CommitTimeoutMs));
1032 LogRel(("AIOMgr: Cache commit threshold is %u bytes\n", pCache->cbCommitDirtyThreshold));
1033 return VINF_SUCCESS;
1034 }
1035
1036 RTCritSectDelete(&pCache->CritSect);
1037 }
1038
1039 return rc;
1040}
1041
1042/**
1043 * Destroysthe cache freeing all data.
1044 *
1045 * returns nothing.
1046 * @param pClassFile The global class data for file endpoints.
1047 */
1048void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1049{
1050 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
1051
1052 /* Make sure no one else uses the cache now */
1053 pdmacFileCacheLockEnter(pCache);
1054
1055 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1056 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
1057 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
1058 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
1059
1060 pdmacFileCacheLockLeave(pCache);
1061
1062 RTCritSectDelete(&pCache->CritSect);
1063}
1064
1065/**
1066 * Initializes per endpoint cache data
1067 * like the AVL tree used to access cached entries.
1068 *
1069 * @returns VBox status code.
1070 * @param pEndpoint The endpoint to init the cache for,
1071 * @param pClassFile The global class data for file endpoints.
1072 */
1073int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1074{
1075 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1076
1077 pEndpointCache->pCache = &pClassFile->Cache;
1078 RTListInit(&pEndpointCache->ListDirtyNotCommitted);
1079 int rc = RTSpinlockCreate(&pEndpointCache->LockList);
1080
1081 if (RT_SUCCESS(rc))
1082 {
1083 rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
1084 if (RT_SUCCESS(rc))
1085 {
1086 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1087 if (pEndpointCache->pTree)
1088 {
1089 pClassFile->Cache.cRefs++;
1090 RTListAppend(&pClassFile->Cache.ListEndpoints, &pEndpointCache->NodeCacheEndpoint);
1091
1092 /* Arm the timer if this is the first endpoint. */
1093 if ( pClassFile->Cache.cRefs == 1
1094 && pClassFile->Cache.u32CommitTimeoutMs > 0)
1095 rc = TMTimerSetMillies(pClassFile->Cache.pTimerCommit, pClassFile->Cache.u32CommitTimeoutMs);
1096 }
1097 else
1098 rc = VERR_NO_MEMORY;
1099
1100 if (RT_FAILURE(rc))
1101 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1102 }
1103
1104 if (RT_FAILURE(rc))
1105 RTSpinlockDestroy(pEndpointCache->LockList);
1106 }
1107
1108#ifdef VBOX_WITH_STATISTICS
1109 if (RT_SUCCESS(rc))
1110 {
1111 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
1112 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1113 STAMUNIT_COUNT, "Number of deferred writes",
1114 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
1115 }
1116#endif
1117
1118 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1119 return rc;
1120}
1121
1122/**
1123 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1124 *
1125 * @returns IPRT status code.
1126 * @param pNode The node to destroy.
1127 * @param pvUser Opaque user data.
1128 */
1129static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1130{
1131 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
1132 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
1133 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
1134
1135 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
1136 {
1137 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1138 RTThreadSleep(250);
1139 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1140 }
1141
1142 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
1143 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1144
1145 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1146 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1147
1148 pdmacFileCacheEntryRemoveFromList(pEntry);
1149
1150 if (fUpdateCache)
1151 pdmacFileCacheSub(pCache, pEntry->cbData);
1152
1153 RTMemPageFree(pEntry->pbData);
1154 RTMemFree(pEntry);
1155
1156 return VINF_SUCCESS;
1157}
1158
1159/**
1160 * Destroys all cache ressources used by the given endpoint.
1161 *
1162 * @returns nothing.
1163 * @param pEndpoint The endpoint to the destroy.
1164 */
1165void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1166{
1167 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1168 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1169
1170 /* Make sure nobody is accessing the cache while we delete the tree. */
1171 pdmacFileCacheLockEnter(pCache);
1172 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1173 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
1174 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1175
1176 RTSpinlockDestroy(pEndpointCache->LockList);
1177
1178 pCache->cRefs--;
1179 RTListNodeRemove(&pEndpointCache->NodeCacheEndpoint);
1180
1181 if ( !pCache->cRefs
1182 && pCache->u32CommitTimeoutMs > 0)
1183 TMTimerStop(pCache->pTimerCommit);
1184
1185 pdmacFileCacheLockLeave(pCache);
1186
1187 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1188
1189#ifdef VBOX_WITH_STATISTICS
1190 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
1191
1192 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
1193#endif
1194}
1195
1196static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1197{
1198 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1199 PPDMACFILECACHEENTRY pEntry = NULL;
1200
1201 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1202
1203 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1204 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
1205 if (pEntry)
1206 pdmacFileEpCacheEntryRef(pEntry);
1207 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1208
1209 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1210
1211 return pEntry;
1212}
1213
1214static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1215{
1216 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1217 PPDMACFILECACHEENTRY pEntry = NULL;
1218
1219 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1220
1221 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1222 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
1223 if (pEntry)
1224 pdmacFileEpCacheEntryRef(pEntry);
1225 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1226
1227 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1228
1229 return pEntry;
1230}
1231
1232static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1233{
1234 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1235
1236 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1237 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1238 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1239 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1240 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1241 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1242}
1243
1244/**
1245 * Allocates and initializes a new entry for the cache.
1246 * The entry has a reference count of 1.
1247 *
1248 * @returns Pointer to the new cache entry or NULL if out of memory.
1249 * @param pCache The cache the entry belongs to.
1250 * @param pEndoint The endpoint the entry holds data for.
1251 * @param off Start offset.
1252 * @param cbData Size of the cache entry.
1253 * @param pbBuffer Pointer to the buffer to use.
1254 * NULL if a new buffer should be allocated.
1255 * The buffer needs to have the same size of the entry.
1256 */
1257static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1258 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1259 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1260{
1261 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1262
1263 if (RT_UNLIKELY(!pEntryNew))
1264 return NULL;
1265
1266 pEntryNew->Core.Key = off;
1267 pEntryNew->Core.KeyLast = off + cbData - 1;
1268 pEntryNew->pEndpoint = pEndpoint;
1269 pEntryNew->pCache = pCache;
1270 pEntryNew->fFlags = 0;
1271 pEntryNew->cRefs = 1; /* We are using it now. */
1272 pEntryNew->pList = NULL;
1273 pEntryNew->cbData = cbData;
1274 pEntryNew->pWaitingHead = NULL;
1275 pEntryNew->pWaitingTail = NULL;
1276 pEntryNew->pbDataReplace = NULL;
1277 if (pbBuffer)
1278 pEntryNew->pbData = pbBuffer;
1279 else
1280 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1281
1282 if (RT_UNLIKELY(!pEntryNew->pbData))
1283 {
1284 RTMemFree(pEntryNew);
1285 return NULL;
1286 }
1287
1288 return pEntryNew;
1289}
1290
1291/**
1292 * Adds a segment to the waiting list for a cache entry
1293 * which is currently in progress.
1294 *
1295 * @returns nothing.
1296 * @param pEntry The cache entry to add the segment to.
1297 * @param pSeg The segment to add.
1298 */
1299DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1300{
1301 pSeg->pNext = NULL;
1302
1303 if (pEntry->pWaitingHead)
1304 {
1305 AssertPtr(pEntry->pWaitingTail);
1306
1307 pEntry->pWaitingTail->pNext = pSeg;
1308 pEntry->pWaitingTail = pSeg;
1309 }
1310 else
1311 {
1312 Assert(!pEntry->pWaitingTail);
1313
1314 pEntry->pWaitingHead = pSeg;
1315 pEntry->pWaitingTail = pSeg;
1316 }
1317}
1318
1319/**
1320 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1321 * in exclusive mode.
1322 *
1323 * @returns true if the flag in fSet is set and the one in fClear is clear.
1324 * false othwerise.
1325 * The R/W semaphore is only held if true is returned.
1326 *
1327 * @param pEndpointCache The endpoint cache instance data.
1328 * @param pEntry The entry to check the flags for.
1329 * @param fSet The flag which is tested to be set.
1330 * @param fClear The flag which is tested to be clear.
1331 */
1332DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1333 PPDMACFILECACHEENTRY pEntry,
1334 uint32_t fSet, uint32_t fClear)
1335{
1336 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1337 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1338
1339 if (fPassed)
1340 {
1341 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1342 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1343
1344 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1345 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1346
1347 /* Drop the lock if we didn't passed the test. */
1348 if (!fPassed)
1349 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1350 }
1351
1352 return fPassed;
1353}
1354
1355/**
1356 * Copies data to a buffer described by a I/O memory context.
1357 *
1358 * @returns nothing.
1359 * @param pIoMemCtx The I/O memory context to copy the data into.
1360 * @param pbData Pointer to the data data to copy.
1361 * @param cbData Amount of data to copy.
1362 */
1363static void pdmacFileEpCacheCopyToIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1364 uint8_t *pbData,
1365 size_t cbData)
1366{
1367 while (cbData)
1368 {
1369 size_t cbCopy = cbData;
1370 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1371
1372 AssertPtr(pbBuf);
1373
1374 memcpy(pbBuf, pbData, cbCopy);
1375
1376 cbData -= cbCopy;
1377 pbData += cbCopy;
1378 }
1379}
1380
1381/**
1382 * Copies data from a buffer described by a I/O memory context.
1383 *
1384 * @returns nothing.
1385 * @param pIoMemCtx The I/O memory context to copy the data from.
1386 * @param pbData Pointer to the destination buffer.
1387 * @param cbData Amount of data to copy.
1388 */
1389static void pdmacFileEpCacheCopyFromIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1390 uint8_t *pbData,
1391 size_t cbData)
1392{
1393 while (cbData)
1394 {
1395 size_t cbCopy = cbData;
1396 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1397
1398 AssertPtr(pbBuf);
1399
1400 memcpy(pbData, pbBuf, cbCopy);
1401
1402 cbData -= cbCopy;
1403 pbData += cbCopy;
1404 }
1405}
1406
1407/**
1408 * Add a buffer described by the I/O memory context
1409 * to the entry waiting for completion.
1410 *
1411 * @returns nothing.
1412 * @param pEntry The entry to add the buffer to.
1413 * @param pTask Task associated with the buffer.
1414 * @param pIoMemCtx The memory context to use.
1415 * @param OffDiff Offset from the start of the buffer
1416 * in the entry.
1417 * @param cbData Amount of data to wait for onthis entry.
1418 * @param fWrite Flag whether the task waits because it wants to write
1419 * to the cache entry.
1420 */
1421static void pdmacFileEpCacheEntryWaitersAdd(PPDMACFILECACHEENTRY pEntry,
1422 PPDMASYNCCOMPLETIONTASKFILE pTask,
1423 PPDMIOMEMCTX pIoMemCtx,
1424 RTFOFF OffDiff,
1425 size_t cbData,
1426 bool fWrite)
1427{
1428 while (cbData)
1429 {
1430 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1431 size_t cbSeg = cbData;
1432 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1433
1434 pSeg->pTask = pTask;
1435 pSeg->uBufOffset = OffDiff;
1436 pSeg->cbTransfer = cbSeg;
1437 pSeg->pvBuf = pbBuf;
1438 pSeg->fWrite = fWrite;
1439
1440 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1441
1442 cbData -= cbSeg;
1443 OffDiff += cbSeg;
1444 }
1445}
1446
1447/**
1448 * Passthrough a part of a request directly to the I/O manager
1449 * handling the endpoint.
1450 *
1451 * @returns nothing.
1452 * @param pEndpoint The endpoint.
1453 * @param pTask The task.
1454 * @param pIoMemCtx The I/O memory context to use.
1455 * @param offStart Offset to start transfer from.
1456 * @param cbData Amount of data to transfer.
1457 * @param enmTransferType The transfer type (read/write)
1458 */
1459static void pdmacFileEpCacheRequestPassthrough(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1460 PPDMASYNCCOMPLETIONTASKFILE pTask,
1461 PPDMIOMEMCTX pIoMemCtx,
1462 RTFOFF offStart, size_t cbData,
1463 PDMACTASKFILETRANSFER enmTransferType)
1464{
1465 while (cbData)
1466 {
1467 size_t cbSeg = cbData;
1468 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1469 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1470 AssertPtr(pIoTask);
1471
1472 pIoTask->pEndpoint = pEndpoint;
1473 pIoTask->enmTransferType = enmTransferType;
1474 pIoTask->Off = offStart;
1475 pIoTask->DataSeg.cbSeg = cbSeg;
1476 pIoTask->DataSeg.pvSeg = pbBuf;
1477 pIoTask->pvUser = pTask;
1478 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1479
1480 offStart += cbSeg;
1481 cbData -= cbSeg;
1482
1483 /* Send it off to the I/O manager. */
1484 pdmacFileEpAddTask(pEndpoint, pIoTask);
1485 }
1486}
1487
1488/**
1489 * Reads the specified data from the endpoint using the cache if possible.
1490 *
1491 * @returns VBox status code.
1492 * @param pEndpoint The endpoint to read from.
1493 * @param pTask The task structure used as identifier for this request.
1494 * @param off The offset to start reading from.
1495 * @param paSegments Pointer to the array holding the destination buffers.
1496 * @param cSegments Number of segments in the array.
1497 * @param cbRead Number of bytes to read.
1498 */
1499int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1500 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1501 size_t cbRead)
1502{
1503 int rc = VINF_SUCCESS;
1504 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1505 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1506 PPDMACFILECACHEENTRY pEntry;
1507
1508 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1509 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1510
1511 pTask->cbTransferLeft = cbRead;
1512 /* Set to completed to make sure that the task is valid while we access it. */
1513 ASMAtomicWriteBool(&pTask->fCompleted, true);
1514
1515 /* Init the I/O memory context */
1516 PDMIOMEMCTX IoMemCtx;
1517 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1518
1519 while (cbRead)
1520 {
1521 size_t cbToRead;
1522
1523 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1524
1525 /*
1526 * If there is no entry we try to create a new one eviciting unused pages
1527 * if the cache is full. If this is not possible we will pass the request through
1528 * and skip the caching (all entries may be still in progress so they can't
1529 * be evicted)
1530 * If we have an entry it can be in one of the LRU lists where the entry
1531 * contains data (recently used or frequently used LRU) so we can just read
1532 * the data we need and put the entry at the head of the frequently used LRU list.
1533 * In case the entry is in one of the ghost lists it doesn't contain any data.
1534 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1535 */
1536 if (pEntry)
1537 {
1538 RTFOFF OffDiff = off - pEntry->Core.Key;
1539
1540 AssertMsg(off >= pEntry->Core.Key,
1541 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1542 off, pEntry->Core.Key));
1543
1544 AssertPtr(pEntry->pList);
1545
1546 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1547
1548 AssertMsg(off + (RTFOFF)cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1549 ("Buffer of cache entry exceeded off=%RTfoff cbToRead=%d\n",
1550 off, cbToRead));
1551
1552 cbRead -= cbToRead;
1553
1554 if (!cbRead)
1555 STAM_COUNTER_INC(&pCache->cHits);
1556 else
1557 STAM_COUNTER_INC(&pCache->cPartialHits);
1558
1559 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1560
1561 /* Ghost lists contain no data. */
1562 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1563 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1564 {
1565 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1566 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1567 0))
1568 {
1569 /* Entry is deprecated. Read data from the new buffer. */
1570 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbDataReplace + OffDiff, cbToRead);
1571 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1572 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1573 }
1574 else
1575 {
1576 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1577 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1578 PDMACFILECACHE_ENTRY_IS_DIRTY))
1579 {
1580 /* Entry didn't completed yet. Append to the list */
1581 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1582 &IoMemCtx,
1583 OffDiff, cbToRead,
1584 false /* fWrite */);
1585 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1586 }
1587 else
1588 {
1589 /* Read as much as we can from the entry. */
1590 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbData + OffDiff, cbToRead);
1591 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1592 }
1593 }
1594
1595 /* Move this entry to the top position */
1596 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1597 {
1598 pdmacFileCacheLockEnter(pCache);
1599 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1600 pdmacFileCacheLockLeave(pCache);
1601 }
1602 /* Release the entry */
1603 pdmacFileEpCacheEntryRelease(pEntry);
1604 }
1605 else
1606 {
1607 uint8_t *pbBuffer = NULL;
1608
1609 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1610
1611 pdmacFileCacheLockEnter(pCache);
1612 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1613 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1614
1615 /* Move the entry to Am and fetch it to the cache. */
1616 if (fEnough)
1617 {
1618 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1619 pdmacFileCacheAdd(pCache, pEntry->cbData);
1620 pdmacFileCacheLockLeave(pCache);
1621
1622 if (pbBuffer)
1623 pEntry->pbData = pbBuffer;
1624 else
1625 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1626 AssertPtr(pEntry->pbData);
1627
1628 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1629 &IoMemCtx,
1630 OffDiff, cbToRead,
1631 false /* fWrite */);
1632 pdmacFileCacheReadFromEndpoint(pEntry);
1633 /* Release the entry */
1634 pdmacFileEpCacheEntryRelease(pEntry);
1635 }
1636 else
1637 {
1638 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1639 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1640 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1641 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1642 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1643
1644 pdmacFileCacheLockLeave(pCache);
1645
1646 RTMemFree(pEntry);
1647
1648 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1649 &IoMemCtx, off, cbToRead,
1650 PDMACTASKFILETRANSFER_READ);
1651 }
1652 }
1653 }
1654 else
1655 {
1656 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1657 size_t cbToReadAligned;
1658 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1659
1660 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1661 pEntryBestFit ? "" : "No ",
1662 off,
1663 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1664 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1665 pEntryBestFit ? pEntryBestFit->cbData : 0));
1666
1667 if ( pEntryBestFit
1668 && off + (RTFOFF)cbRead > pEntryBestFit->Core.Key)
1669 {
1670 cbToRead = pEntryBestFit->Core.Key - off;
1671 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1672 cbToReadAligned = cbToRead;
1673 }
1674 else
1675 {
1676 /*
1677 * Align the size to a 4KB boundary.
1678 * Memory size is aligned to a page boundary
1679 * and memory is wasted if the size is rahter small.
1680 * (For example reads with a size of 512 bytes.
1681 */
1682 cbToRead = cbRead;
1683 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1684
1685 /* Clip read to file size */
1686 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1687 if (pEntryBestFit)
1688 {
1689 Assert(pEntryBestFit->Core.Key >= off);
1690 cbToReadAligned = RT_MIN(cbToReadAligned, (uint64_t)pEntryBestFit->Core.Key - off);
1691 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1692 }
1693 }
1694
1695 cbRead -= cbToRead;
1696
1697 if (!cbRead)
1698 STAM_COUNTER_INC(&pCache->cMisses);
1699 else
1700 STAM_COUNTER_INC(&pCache->cPartialHits);
1701
1702 uint8_t *pbBuffer = NULL;
1703
1704 pdmacFileCacheLockEnter(pCache);
1705 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1706
1707 if (fEnough)
1708 {
1709 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1710
1711 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1712 AssertPtr(pEntryNew);
1713
1714 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1715 pdmacFileCacheAdd(pCache, cbToReadAligned);
1716 pdmacFileCacheLockLeave(pCache);
1717
1718 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1719
1720 AssertMsg( (off >= pEntryNew->Core.Key)
1721 && (off + (RTFOFF)cbToRead <= pEntryNew->Core.Key + pEntryNew->Core.KeyLast + 1),
1722 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1723 off, pEntryNew->Core.Key));
1724
1725 pdmacFileEpCacheEntryWaitersAdd(pEntryNew, pTask,
1726 &IoMemCtx, 0, cbToRead,
1727 false /* fWrite */);
1728 pdmacFileCacheReadFromEndpoint(pEntryNew);
1729 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1730 }
1731 else
1732 {
1733 pdmacFileCacheLockLeave(pCache);
1734
1735 /*
1736 * There is not enough free space in the cache.
1737 * Pass the request directly to the I/O manager.
1738 */
1739 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1740
1741 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1742 &IoMemCtx, off, cbToRead,
1743 PDMACTASKFILETRANSFER_READ);
1744 }
1745 }
1746 off += cbToRead;
1747 }
1748
1749 ASMAtomicWriteBool(&pTask->fCompleted, false);
1750
1751 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1752 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1753 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1754 else
1755 rc = VINF_AIO_TASK_PENDING;
1756
1757 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1758
1759 return rc;
1760}
1761
1762/**
1763 * Writes the given data to the endpoint using the cache if possible.
1764 *
1765 * @returns VBox status code.
1766 * @param pEndpoint The endpoint to write to.
1767 * @param pTask The task structure used as identifier for this request.
1768 * @param off The offset to start writing to
1769 * @param paSegments Pointer to the array holding the source buffers.
1770 * @param cSegments Number of segments in the array.
1771 * @param cbWrite Number of bytes to write.
1772 */
1773int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1774 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1775 size_t cbWrite)
1776{
1777 int rc = VINF_SUCCESS;
1778 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1779 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1780 PPDMACFILECACHEENTRY pEntry;
1781
1782 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1783 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1784
1785 pTask->cbTransferLeft = cbWrite;
1786 /* Set to completed to make sure that the task is valid while we access it. */
1787 ASMAtomicWriteBool(&pTask->fCompleted, true);
1788
1789 /* Init the I/O memory context */
1790 PDMIOMEMCTX IoMemCtx;
1791 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1792
1793 while (cbWrite)
1794 {
1795 size_t cbToWrite;
1796
1797 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1798
1799 if (pEntry)
1800 {
1801 /* Write the data into the entry and mark it as dirty */
1802 AssertPtr(pEntry->pList);
1803
1804 RTFOFF OffDiff = off - pEntry->Core.Key;
1805
1806 AssertMsg(off >= pEntry->Core.Key,
1807 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1808 off, pEntry->Core.Key));
1809
1810 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1811 cbWrite -= cbToWrite;
1812
1813 if (!cbWrite)
1814 STAM_COUNTER_INC(&pCache->cHits);
1815 else
1816 STAM_COUNTER_INC(&pCache->cPartialHits);
1817
1818 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1819
1820 /* Ghost lists contain no data. */
1821 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1822 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1823 {
1824 /* Check if the buffer is deprecated. */
1825 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1826 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1827 0))
1828 {
1829 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1830 ("Entry is deprecated but not in progress\n"));
1831 AssertPtr(pEntry->pbDataReplace);
1832
1833 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1834
1835 /* Update the data from the write. */
1836 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1837 pEntry->pbDataReplace + OffDiff,
1838 cbToWrite);
1839 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1840 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1841 }
1842 else /* Deprecated flag not set */
1843 {
1844 /* Check if the entry is dirty. */
1845 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1846 PDMACFILECACHE_ENTRY_IS_DIRTY,
1847 0))
1848 {
1849 /* If it is dirty but not in progrss just update the data. */
1850 if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS))
1851 {
1852 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1853 pEntry->pbData + OffDiff,
1854 cbToWrite);
1855 }
1856 else
1857 {
1858 Assert(!pEntry->pbDataReplace);
1859
1860 /* Deprecate the current buffer. */
1861 if (!pEntry->pWaitingHead)
1862 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1863
1864 /* If we are out of memory or have waiting segments
1865 * defer the write. */
1866 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1867 {
1868 /* The data isn't written to the file yet */
1869 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1870 &IoMemCtx,
1871 OffDiff, cbToWrite,
1872 true /* fWrite */);
1873 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1874 }
1875 else /* Deprecate buffer */
1876 {
1877 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1878 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1879
1880 /* Copy the data before the update. */
1881 if (OffDiff)
1882 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1883
1884 /* Copy data behind the update. */
1885 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1886 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1887 pEntry->pbData + OffDiff + cbToWrite,
1888 (pEntry->cbData - OffDiff - cbToWrite));
1889
1890 /* Update the data from the write. */
1891 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1892 pEntry->pbDataReplace + OffDiff,
1893 cbToWrite);
1894 /* We are done here. A new write is initiated if the current request completes. */
1895 }
1896 }
1897
1898 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1899 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1900 }
1901 else /* Dirty bit not set */
1902 {
1903 /*
1904 * Check if a read is in progress for this entry.
1905 * We have to defer processing in that case.
1906 */
1907 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1908 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1909 0))
1910 {
1911 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1912 &IoMemCtx,
1913 OffDiff, cbToWrite,
1914 true /* fWrite */);
1915 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1916 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1917 }
1918 else /* I/O in progress flag not set */
1919 {
1920 /* Write as much as we can into the entry and update the file. */
1921 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1922 pEntry->pbData + OffDiff,
1923 cbToWrite);
1924 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1925
1926 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
1927 if (fCommit)
1928 pdmacFileCacheCommitDirtyEntries(pCache);
1929 }
1930 } /* Dirty bit not set */
1931
1932 /* Move this entry to the top position */
1933 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1934 {
1935 pdmacFileCacheLockEnter(pCache);
1936 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1937 pdmacFileCacheLockLeave(pCache);
1938 } /* Deprecated flag not set. */
1939 }
1940 pdmacFileEpCacheEntryRelease(pEntry);
1941 }
1942 else /* Entry is on the ghost list */
1943 {
1944 uint8_t *pbBuffer = NULL;
1945
1946 pdmacFileCacheLockEnter(pCache);
1947 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1948 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1949
1950 if (fEnough)
1951 {
1952 /* Move the entry to Am and fetch it to the cache. */
1953 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1954 pdmacFileCacheAdd(pCache, pEntry->cbData);
1955 pdmacFileCacheLockLeave(pCache);
1956
1957 if (pbBuffer)
1958 pEntry->pbData = pbBuffer;
1959 else
1960 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1961 AssertPtr(pEntry->pbData);
1962
1963 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1964 &IoMemCtx,
1965 OffDiff, cbToWrite,
1966 true /* fWrite */);
1967 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1968 pdmacFileCacheReadFromEndpoint(pEntry);
1969
1970 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1971 pdmacFileEpCacheEntryRelease(pEntry);
1972 }
1973 else
1974 {
1975 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1976 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1977 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1978 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1979 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1980
1981 pdmacFileCacheLockLeave(pCache);
1982
1983 RTMemFree(pEntry);
1984 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1985 &IoMemCtx, off, cbToWrite,
1986 PDMACTASKFILETRANSFER_WRITE);
1987 }
1988 }
1989 }
1990 else /* No entry found */
1991 {
1992 /*
1993 * No entry found. Try to create a new cache entry to store the data in and if that fails
1994 * write directly to the file.
1995 */
1996 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1997
1998 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1999 pEntryBestFit ? "B" : "No b",
2000 off,
2001 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
2002 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
2003 pEntryBestFit ? pEntryBestFit->cbData : 0));
2004
2005 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
2006 {
2007 cbToWrite = pEntryBestFit->Core.Key - off;
2008 pdmacFileEpCacheEntryRelease(pEntryBestFit);
2009 }
2010 else
2011 {
2012 if (pEntryBestFit)
2013 pdmacFileEpCacheEntryRelease(pEntryBestFit);
2014
2015 cbToWrite = cbWrite;
2016 }
2017
2018 cbWrite -= cbToWrite;
2019
2020 STAM_COUNTER_INC(&pCache->cMisses);
2021
2022 uint8_t *pbBuffer = NULL;
2023
2024 pdmacFileCacheLockEnter(pCache);
2025 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
2026
2027 if (fEnough)
2028 {
2029 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
2030
2031 PPDMACFILECACHEENTRY pEntryNew;
2032
2033 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
2034 AssertPtr(pEntryNew);
2035
2036 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
2037 pdmacFileCacheAdd(pCache, cbToWrite);
2038 pdmacFileCacheLockLeave(pCache);
2039
2040 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
2041
2042 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2043 pEntryNew->pbData,
2044 cbToWrite);
2045 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2046
2047 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntryNew);
2048 if (fCommit)
2049 pdmacFileCacheCommitDirtyEntries(pCache);
2050 pdmacFileEpCacheEntryRelease(pEntryNew);
2051 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2052 }
2053 else
2054 {
2055 pdmacFileCacheLockLeave(pCache);
2056
2057 /*
2058 * There is not enough free space in the cache.
2059 * Pass the request directly to the I/O manager.
2060 */
2061 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2062
2063 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
2064 &IoMemCtx, off, cbToWrite,
2065 PDMACTASKFILETRANSFER_WRITE);
2066 }
2067 }
2068
2069 off += cbToWrite;
2070 }
2071
2072 ASMAtomicWriteBool(&pTask->fCompleted, false);
2073
2074 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
2075 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
2076 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2077 else
2078 rc = VINF_AIO_TASK_PENDING;
2079
2080 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2081
2082 return rc;
2083}
2084
2085int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
2086{
2087 int rc = VINF_SUCCESS;
2088
2089 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p\n",
2090 pEndpoint, pEndpoint->Core.pszUri, pTask));
2091
2092 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
2093 rc = VERR_RESOURCE_BUSY;
2094 else
2095 {
2096 /* Check for dirty entries in the cache. */
2097 pdmacFileCacheEndpointCommit(&pEndpoint->DataCache);
2098 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
2099 {
2100 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
2101 rc = VINF_AIO_TASK_PENDING;
2102 }
2103 else
2104 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2105 }
2106
2107 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2108 return rc;
2109}
2110
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette