VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 26871

Last change on this file since 26871 was 26814, checked in by vboxsync, 15 years ago

AsyncCompletion: Protect the dirty but not committed list by a spinlock

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 80.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 26814 2010-02-25 22:44:22Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the 2Q cache algorithm.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
31#include <iprt/types.h>
32#include <iprt/mem.h>
33#include <iprt/path.h>
34#include <VBox/log.h>
35#include <VBox/stam.h>
36
37#include "PDMAsyncCompletionFileInternal.h"
38
39/**
40 * A I/O memory context.
41 */
42typedef struct PDMIOMEMCTX
43{
44 /** Pointer to the scatter/gather list. */
45 PCPDMDATASEG paDataSeg;
46 /** Number of segments. */
47 size_t cSegments;
48 /** Current segment we are in. */
49 unsigned iSegIdx;
50 /** Pointer to the current buffer. */
51 uint8_t *pbBuf;
52 /** Number of bytes left in the current buffer. */
53 size_t cbBufLeft;
54} PDMIOMEMCTX, *PPDMIOMEMCTX;
55
56#ifdef VBOX_STRICT
57# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
58 do \
59 { \
60 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
61 ("Thread does not own critical section\n"));\
62 } while(0)
63
64# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
65 do \
66 { \
67 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
68 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
69 } while(0)
70
71# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
72 do \
73 { \
74 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
75 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
76 } while(0)
77
78#else
79# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
80# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
81# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
82#endif
83
84/*******************************************************************************
85* Internal Functions *
86*******************************************************************************/
87static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
88
89/**
90 * Decrement the reference counter of the given cache entry.
91 *
92 * @returns nothing.
93 * @param pEntry The entry to release.
94 */
95DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
96{
97 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
98 ASMAtomicDecU32(&pEntry->cRefs);
99}
100
101/**
102 * Increment the reference counter of the given cache entry.
103 *
104 * @returns nothing.
105 * @param pEntry The entry to reference.
106 */
107DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
108{
109 ASMAtomicIncU32(&pEntry->cRefs);
110}
111
112/**
113 * Initialize a I/O memory context.
114 *
115 * @returns nothing
116 * @param pIoMemCtx Pointer to a unitialized I/O memory context.
117 * @param paDataSeg Pointer to the S/G list.
118 * @param cSegments Number of segments in the S/G list.
119 */
120DECLINLINE(void) pdmIoMemCtxInit(PPDMIOMEMCTX pIoMemCtx, PCPDMDATASEG paDataSeg, size_t cSegments)
121{
122 AssertMsg((cSegments > 0) && paDataSeg, ("Trying to initialize a I/O memory context without a S/G list\n"));
123
124 pIoMemCtx->paDataSeg = paDataSeg;
125 pIoMemCtx->cSegments = cSegments;
126 pIoMemCtx->iSegIdx = 0;
127 pIoMemCtx->pbBuf = (uint8_t *)paDataSeg[0].pvSeg;
128 pIoMemCtx->cbBufLeft = paDataSeg[0].cbSeg;
129}
130
131/**
132 * Return a buffer from the I/O memory context.
133 *
134 * @returns Pointer to the buffer
135 * @param pIoMemCtx Pointer to the I/O memory context.
136 * @param pcbData Pointer to the amount of byte requested.
137 * If the current buffer doesn't have enough bytes left
138 * the amount is returned in the variable.
139 */
140DECLINLINE(uint8_t *) pdmIoMemCtxGetBuffer(PPDMIOMEMCTX pIoMemCtx, size_t *pcbData)
141{
142 size_t cbData = RT_MIN(*pcbData, pIoMemCtx->cbBufLeft);
143 uint8_t *pbBuf = pIoMemCtx->pbBuf;
144
145 pIoMemCtx->cbBufLeft -= cbData;
146
147 /* Advance to the next segment if required. */
148 if (!pIoMemCtx->cbBufLeft)
149 {
150 pIoMemCtx->iSegIdx++;
151
152 if (RT_UNLIKELY(pIoMemCtx->iSegIdx == pIoMemCtx->cSegments))
153 {
154 pIoMemCtx->cbBufLeft = 0;
155 pIoMemCtx->pbBuf = NULL;
156 }
157 else
158 {
159 pIoMemCtx->pbBuf = (uint8_t *)pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].pvSeg;
160 pIoMemCtx->cbBufLeft = pIoMemCtx->paDataSeg[pIoMemCtx->iSegIdx].cbSeg;
161 }
162
163 *pcbData = cbData;
164 }
165 else
166 pIoMemCtx->pbBuf += cbData;
167
168 return pbBuf;
169}
170
171#ifdef DEBUG
172static void pdmacFileCacheValidate(PPDMACFILECACHEGLOBAL pCache)
173{
174 /* Amount of cached data should never exceed the maximum amount. */
175 AssertMsg(pCache->cbCached <= pCache->cbMax,
176 ("Current amount of cached data exceeds maximum\n"));
177
178 /* The amount of cached data in the LRU and FRU list should match cbCached */
179 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
180 ("Amount of cached data doesn't match\n"));
181
182 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
183 ("Paged out list exceeds maximum\n"));
184}
185#endif
186
187DECLINLINE(void) pdmacFileCacheLockEnter(PPDMACFILECACHEGLOBAL pCache)
188{
189 RTCritSectEnter(&pCache->CritSect);
190#ifdef DEBUG
191 pdmacFileCacheValidate(pCache);
192#endif
193}
194
195DECLINLINE(void) pdmacFileCacheLockLeave(PPDMACFILECACHEGLOBAL pCache)
196{
197#ifdef DEBUG
198 pdmacFileCacheValidate(pCache);
199#endif
200 RTCritSectLeave(&pCache->CritSect);
201}
202
203DECLINLINE(void) pdmacFileCacheSub(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
204{
205 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
206 pCache->cbCached -= cbAmount;
207}
208
209DECLINLINE(void) pdmacFileCacheAdd(PPDMACFILECACHEGLOBAL pCache, uint32_t cbAmount)
210{
211 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
212 pCache->cbCached += cbAmount;
213}
214
215DECLINLINE(void) pdmacFileCacheListAdd(PPDMACFILELRULIST pList, uint32_t cbAmount)
216{
217 pList->cbCached += cbAmount;
218}
219
220DECLINLINE(void) pdmacFileCacheListSub(PPDMACFILELRULIST pList, uint32_t cbAmount)
221{
222 pList->cbCached -= cbAmount;
223}
224
225#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
226/**
227 * Checks consistency of a LRU list.
228 *
229 * @returns nothing
230 * @param pList The LRU list to check.
231 * @param pNotInList Element which is not allowed to occur in the list.
232 */
233static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
234{
235 PPDMACFILECACHEENTRY pCurr = pList->pHead;
236
237 /* Check that there are no double entries and no cycles in the list. */
238 while (pCurr)
239 {
240 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
241
242 while (pNext)
243 {
244 AssertMsg(pCurr != pNext,
245 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
246 pCurr, pList));
247 pNext = pNext->pNext;
248 }
249
250 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
251
252 if (!pCurr->pNext)
253 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
254
255 pCurr = pCurr->pNext;
256 }
257}
258#endif
259
260/**
261 * Unlinks a cache entry from the LRU list it is assigned to.
262 *
263 * @returns nothing.
264 * @param pEntry The entry to unlink.
265 */
266static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
267{
268 PPDMACFILELRULIST pList = pEntry->pList;
269 PPDMACFILECACHEENTRY pPrev, pNext;
270
271 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
272
273 AssertPtr(pList);
274
275#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
276 pdmacFileCacheCheckList(pList, NULL);
277#endif
278
279 pPrev = pEntry->pPrev;
280 pNext = pEntry->pNext;
281
282 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
283 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
284
285 if (pPrev)
286 pPrev->pNext = pNext;
287 else
288 {
289 pList->pHead = pNext;
290
291 if (pNext)
292 pNext->pPrev = NULL;
293 }
294
295 if (pNext)
296 pNext->pPrev = pPrev;
297 else
298 {
299 pList->pTail = pPrev;
300
301 if (pPrev)
302 pPrev->pNext = NULL;
303 }
304
305 pEntry->pList = NULL;
306 pEntry->pPrev = NULL;
307 pEntry->pNext = NULL;
308 pdmacFileCacheListSub(pList, pEntry->cbData);
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmacFileCacheCheckList(pList, pEntry);
311#endif
312}
313
314/**
315 * Adds a cache entry to the given LRU list unlinking it from the currently
316 * assigned list if needed.
317 *
318 * @returns nothing.
319 * @param pList List to the add entry to.
320 * @param pEntry Entry to add.
321 */
322static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
323{
324 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
325#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
326 pdmacFileCacheCheckList(pList, NULL);
327#endif
328
329 /* Remove from old list if needed */
330 if (pEntry->pList)
331 pdmacFileCacheEntryRemoveFromList(pEntry);
332
333 pEntry->pNext = pList->pHead;
334 if (pList->pHead)
335 pList->pHead->pPrev = pEntry;
336 else
337 {
338 Assert(!pList->pTail);
339 pList->pTail = pEntry;
340 }
341
342 pEntry->pPrev = NULL;
343 pList->pHead = pEntry;
344 pdmacFileCacheListAdd(pList, pEntry->cbData);
345 pEntry->pList = pList;
346#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
347 pdmacFileCacheCheckList(pList, NULL);
348#endif
349}
350
351/**
352 * Destroys a LRU list freeing all entries.
353 *
354 * @returns nothing
355 * @param pList Pointer to the LRU list to destroy.
356 *
357 * @note The caller must own the critical section of the cache.
358 */
359static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
360{
361 while (pList->pHead)
362 {
363 PPDMACFILECACHEENTRY pEntry = pList->pHead;
364
365 pList->pHead = pEntry->pNext;
366
367 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
368 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
369
370 RTMemPageFree(pEntry->pbData);
371 RTMemFree(pEntry);
372 }
373}
374
375/**
376 * Tries to remove the given amount of bytes from a given list in the cache
377 * moving the entries to one of the given ghosts lists
378 *
379 * @returns Amount of data which could be freed.
380 * @param pCache Pointer to the global cache data.
381 * @param cbData The amount of the data to free.
382 * @param pListSrc The source list to evict data from.
383 * @param pGhostListSrc The ghost list removed entries should be moved to
384 * NULL if the entry should be freed.
385 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
386 * @param ppbBuf Where to store the address of the buffer if an entry with the
387 * same size was found and fReuseBuffer is true.
388 *
389 * @note This function may return fewer bytes than requested because entries
390 * may be marked as non evictable if they are used for I/O at the
391 * moment.
392 */
393static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
394 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
395 bool fReuseBuffer, uint8_t **ppbBuffer)
396{
397 size_t cbEvicted = 0;
398
399 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
400
401 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
402 AssertMsg( !pGhostListDst
403 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
404 ("Destination list must be NULL or the recently used but paged out list\n"));
405
406 if (fReuseBuffer)
407 {
408 AssertPtr(ppbBuffer);
409 *ppbBuffer = NULL;
410 }
411
412 /* Start deleting from the tail. */
413 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
414
415 while ((cbEvicted < cbData) && pEntry)
416 {
417 PPDMACFILECACHEENTRY pCurr = pEntry;
418
419 pEntry = pEntry->pPrev;
420
421 /* We can't evict pages which are currently in progress or dirty but not in progress */
422 if ( !(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
423 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
424 {
425 /* Ok eviction candidate. Grab the endpoint semaphore and check again
426 * because somebody else might have raced us. */
427 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
428 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (!(pCurr->fFlags & PDMACFILECACHE_NOT_EVICTABLE)
431 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
432 {
433 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
434 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
435 Assert(!pCurr->pbDataReplace);
436
437 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
438
439 if (fReuseBuffer && (pCurr->cbData == cbData))
440 {
441 STAM_COUNTER_INC(&pCache->StatBuffersReused);
442 *ppbBuffer = pCurr->pbData;
443 }
444 else if (pCurr->pbData)
445 RTMemPageFree(pCurr->pbData);
446
447 pCurr->pbData = NULL;
448 cbEvicted += pCurr->cbData;
449
450 pdmacFileCacheEntryRemoveFromList(pCurr);
451 pdmacFileCacheSub(pCache, pCurr->cbData);
452
453 if (pGhostListDst)
454 {
455 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
456
457 PPDMACFILECACHEENTRY pGhostEntFree = pGhostListDst->pTail;
458
459 /* We have to remove the last entries from the paged out list. */
460 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
461 && pGhostEntFree)
462 {
463 PPDMACFILECACHEENTRY pFree = pGhostEntFree;
464 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
465
466 pGhostEntFree = pGhostEntFree->pPrev;
467
468 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
469
470 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
471 {
472 pdmacFileCacheEntryRemoveFromList(pFree);
473
474 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
475 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
476 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
477
478 RTMemFree(pFree);
479 }
480
481 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
482 }
483
484 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
485 {
486 /* Couldn't remove enough entries. Delete */
487 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
488 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
489 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
490
491 RTMemFree(pCurr);
492 }
493 else
494 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
495 }
496 else
497 {
498 /* Delete the entry from the AVL tree it is assigned to. */
499 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
500 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
501 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
502
503 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
504 RTMemFree(pCurr);
505 }
506 }
507
508 }
509 else
510 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
511 }
512
513 return cbEvicted;
514}
515
516static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
517{
518 size_t cbRemoved = 0;
519
520 if ((pCache->cbCached + cbData) < pCache->cbMax)
521 return true;
522 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
523 {
524 /* Try to evict as many bytes as possible from A1in */
525 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
526 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
527
528 /*
529 * If it was not possible to remove enough entries
530 * try the frequently accessed cache.
531 */
532 if (cbRemoved < cbData)
533 {
534 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
535
536 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
537 NULL, fReuseBuffer, ppbBuffer);
538 }
539 }
540 else
541 {
542 /* We have to remove entries from frequently access list. */
543 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
544 NULL, fReuseBuffer, ppbBuffer);
545 }
546
547 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
548 return (cbRemoved >= cbData);
549}
550
551/**
552 * Initiates a read I/O task for the given entry.
553 *
554 * @returns nothing.
555 * @param pEntry The entry to fetch the data to.
556 */
557static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
558{
559 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
560
561 /* Make sure no one evicts the entry while it is accessed. */
562 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
563
564 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
565 AssertPtr(pIoTask);
566
567 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
568
569 pIoTask->pEndpoint = pEntry->pEndpoint;
570 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
571 pIoTask->Off = pEntry->Core.Key;
572 pIoTask->DataSeg.cbSeg = pEntry->cbData;
573 pIoTask->DataSeg.pvSeg = pEntry->pbData;
574 pIoTask->pvUser = pEntry;
575 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
576
577 /* Send it off to the I/O manager. */
578 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
579}
580
581/**
582 * Initiates a write I/O task for the given entry.
583 *
584 * @returns nothing.
585 * @param pEntry The entry to read the data from.
586 */
587static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
588{
589 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
590
591 /* Make sure no one evicts the entry while it is accessed. */
592 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
593
594 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
595 AssertPtr(pIoTask);
596
597 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
598
599 pIoTask->pEndpoint = pEntry->pEndpoint;
600 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
601 pIoTask->Off = pEntry->Core.Key;
602 pIoTask->DataSeg.cbSeg = pEntry->cbData;
603 pIoTask->DataSeg.pvSeg = pEntry->pbData;
604 pIoTask->pvUser = pEntry;
605 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
606 ASMAtomicIncU32(&pEntry->pEndpoint->DataCache.cWritesOutstanding);
607
608 /* Send it off to the I/O manager. */
609 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
610}
611
612/**
613 * Commit a single dirty entry to the endpoint
614 *
615 * @returns nothing
616 * @param pEntry The entry to commit.
617 */
618static void pdmacFileCacheEntryCommit(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
619{
620 NOREF(pEndpointCache);
621 AssertMsg( (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
622 && !(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
623 ("Invalid flags set for entry %#p\n", pEntry));
624
625 pdmacFileCacheWriteToEndpoint(pEntry);
626}
627
628/**
629 * Commit all dirty entries for a single endpoint.
630 *
631 * @returns nothing.
632 * @param pEndpointCache The endpoint cache to commit.
633 */
634static void pdmacFileCacheEndpointCommit(PPDMACFILEENDPOINTCACHE pEndpointCache)
635{
636 uint32_t cbCommitted = 0;
637 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
638
639 /* The list is moved to a new header to reduce locking overhead. */
640 RTLISTNODE ListDirtyNotCommitted;
641 RTSPINLOCKTMP Tmp;
642
643 RTListInit(&ListDirtyNotCommitted);
644 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
645 RTListMove(&ListDirtyNotCommitted, &pEndpointCache->ListDirtyNotCommitted);
646 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
647
648 if (!RTListIsEmpty(&ListDirtyNotCommitted))
649 {
650 PPDMACFILECACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
651 PDMACFILECACHEENTRY,
652 NodeNotCommitted);
653
654 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
655 {
656 PPDMACFILECACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMACFILECACHEENTRY,
657 NodeNotCommitted);
658 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
659 cbCommitted += pEntry->cbData;
660 RTListNodeRemove(&pEntry->NodeNotCommitted);
661 pEntry = pNext;
662 }
663
664 /* Commit the last endpoint */
665 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
666 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
667 RTListNodeRemove(&pEntry->NodeNotCommitted);
668 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
669 ("Committed all entries but list is not empty\n"));
670 }
671
672 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
673 AssertMsg(pEndpointCache->pCache->cbDirty >= cbCommitted,
674 ("Number of committed bytes exceeds number of dirty bytes\n"));
675 ASMAtomicSubU32(&pEndpointCache->pCache->cbDirty, cbCommitted);
676}
677
678/**
679 * Commit all dirty entries in the cache.
680 *
681 * @returns nothing.
682 * @param pCache The global cache instance.
683 */
684static void pdmacFileCacheCommitDirtyEntries(PPDMACFILECACHEGLOBAL pCache)
685{
686 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
687
688 if (!fCommitInProgress)
689 {
690 pdmacFileCacheLockEnter(pCache);
691 Assert(!RTListIsEmpty(&pCache->ListEndpoints));
692
693 PPDMACFILEENDPOINTCACHE pEndpointCache = RTListNodeGetFirst(&pCache->ListEndpoints,
694 PDMACFILEENDPOINTCACHE,
695 NodeCacheEndpoint);
696 AssertPtr(pEndpointCache);
697
698 while (!RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint))
699 {
700 pdmacFileCacheEndpointCommit(pEndpointCache);
701
702 pEndpointCache = RTListNodeGetNext(&pEndpointCache->NodeCacheEndpoint, PDMACFILEENDPOINTCACHE,
703 NodeCacheEndpoint);
704 }
705
706 /* Commit the last endpoint */
707 Assert(RTListNodeIsLast(&pCache->ListEndpoints, &pEndpointCache->NodeCacheEndpoint));
708 pdmacFileCacheEndpointCommit(pEndpointCache);
709
710 pdmacFileCacheLockLeave(pCache);
711 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
712 }
713}
714
715/**
716 * Adds the given entry as a dirty to the cache.
717 *
718 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
719 * @param pEndpointCache The endpoint cache the entry belongs to.
720 * @param pEntry The entry to add.
721 */
722static bool pdmacFileCacheAddDirtyEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
723{
724 bool fDirtyBytesExceeded = false;
725 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
726
727 /* If the commit timer is disabled we commit right away. */
728 if (pCache->u32CommitTimeoutMs == 0)
729 {
730 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
731 pdmacFileCacheEntryCommit(pEndpointCache, pEntry);
732 }
733 else if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY))
734 {
735 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
736
737 RTSPINLOCKTMP Tmp;
738 RTSpinlockAcquire(pEndpointCache->LockList, &Tmp);
739 RTListAppend(&pEndpointCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
740 RTSpinlockRelease(pEndpointCache->LockList, &Tmp);
741
742 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
743
744 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
745 }
746
747 return fDirtyBytesExceeded;
748}
749
750
751/**
752 * Completes a task segment freeing all ressources and completes the task handle
753 * if everything was transfered.
754 *
755 * @returns Next task segment handle.
756 * @param pEndpointCache The endpoint cache.
757 * @param pTaskSeg Task segment to complete.
758 */
759static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
760{
761 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
762
763 uint32_t uOld = ASMAtomicSubS32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
764 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
765 if (!(uOld - pTaskSeg->cbTransfer)
766 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
767 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core, true);
768
769 RTMemFree(pTaskSeg);
770
771 return pNext;
772}
773
774/**
775 * Completion callback for I/O tasks.
776 *
777 * @returns nothing.
778 * @param pTask The completed task.
779 * @param pvUser Opaque user data.
780 */
781static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
782{
783 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
784 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
785 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
786 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
787
788 /* Reference the entry now as we are clearing the I/O in progres flag
789 * which protects the entry till now. */
790 pdmacFileEpCacheEntryRef(pEntry);
791
792 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
793 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
794
795 /* Process waiting segment list. The data in entry might have changed inbetween. */
796 bool fDirty = false;
797 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
798
799 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
800 ("The list tail was not updated correctly\n"));
801 pEntry->pWaitingTail = NULL;
802 pEntry->pWaitingHead = NULL;
803
804 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
805 {
806 AssertMsg(pEndpointCache->cWritesOutstanding > 0, ("Completed write request but outstanding task count is 0\n"));
807 ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
808
809 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
810
811 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
812 {
813 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
814
815 RTMemPageFree(pEntry->pbData);
816 pEntry->pbData = pEntry->pbDataReplace;
817 pEntry->pbDataReplace = NULL;
818 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
819 }
820 else
821 {
822 while (pCurr)
823 {
824 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
825
826 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
827 fDirty = true;
828
829 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
830 }
831 }
832 }
833 else
834 {
835 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
836 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IS_DIRTY | PDMACFILECACHE_ENTRY_IS_DEPRECATED)),
837 ("Invalid flags set\n"));
838
839 while (pCurr)
840 {
841 if (pCurr->fWrite)
842 {
843 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
844 fDirty = true;
845 }
846 else
847 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
848
849 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
850 }
851 }
852
853 bool fCommit = false;
854 if (fDirty)
855 fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
856
857 /* Complete a pending flush if all writes have completed */
858 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
859 {
860 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
861 if (pTaskFlush)
862 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
863 }
864
865 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
866
867 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
868 pdmacFileEpCacheEntryRelease(pEntry);
869
870 if (fCommit)
871 pdmacFileCacheCommitDirtyEntries(pCache);
872}
873
874/**
875 * Commit timer callback.
876 */
877static void pdmacFileCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
878{
879 PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pvUser;
880 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
881
882 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
883
884 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
885 pdmacFileCacheCommitDirtyEntries(pCache);
886
887 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
888 LogFlowFunc(("Entries committed, going to sleep\n"));
889}
890
891/**
892 * Initializies the I/O cache.
893 *
894 * returns VBox status code.
895 * @param pClassFile The global class data for file endpoints.
896 * @param pCfgNode CFGM node to query configuration data from.
897 */
898int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
899{
900 int rc = VINF_SUCCESS;
901 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
902
903 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
904 AssertLogRelRCReturn(rc, rc);
905
906 RTListInit(&pCache->ListEndpoints);
907 pCache->cRefs = 0;
908 pCache->cbCached = 0;
909 pCache->fCommitInProgress = 0;
910 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
911
912 /* Initialize members */
913 pCache->LruRecentlyUsedIn.pHead = NULL;
914 pCache->LruRecentlyUsedIn.pTail = NULL;
915 pCache->LruRecentlyUsedIn.cbCached = 0;
916
917 pCache->LruRecentlyUsedOut.pHead = NULL;
918 pCache->LruRecentlyUsedOut.pTail = NULL;
919 pCache->LruRecentlyUsedOut.cbCached = 0;
920
921 pCache->LruFrequentlyUsed.pHead = NULL;
922 pCache->LruFrequentlyUsed.pTail = NULL;
923 pCache->LruFrequentlyUsed.cbCached = 0;
924
925 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
926 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
927 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
928
929 /** @todo r=aeichner: Experiment to find optimal default values */
930 rc = CFGMR3QueryU32Def(pCfgNode, "CacheCommitIntervalMs", &pCache->u32CommitTimeoutMs, 10000 /* 10sec */);
931 AssertLogRelRCReturn(rc, rc);
932 rc = CFGMR3QueryU32(pCfgNode, "CacheCommitThreshold", &pCache->cbCommitDirtyThreshold);
933 if ( rc == VERR_CFGM_VALUE_NOT_FOUND
934 || rc == VERR_CFGM_NO_PARENT)
935 {
936 /* Start committing after 50% of the cache are dirty */
937 pCache->cbCommitDirtyThreshold = pCache->cbMax / 2;
938 }
939 else
940 return rc;
941
942 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
943 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
944 "/PDM/AsyncCompletion/File/cbMax",
945 STAMUNIT_BYTES,
946 "Maximum cache size");
947 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
948 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
949 "/PDM/AsyncCompletion/File/cbCached",
950 STAMUNIT_BYTES,
951 "Currently used cache");
952 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
953 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
954 "/PDM/AsyncCompletion/File/cbCachedMruIn",
955 STAMUNIT_BYTES,
956 "Number of bytes cached in MRU list");
957 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
958 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
959 "/PDM/AsyncCompletion/File/cbCachedMruOut",
960 STAMUNIT_BYTES,
961 "Number of bytes cached in FRU list");
962 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
963 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
964 "/PDM/AsyncCompletion/File/cbCachedFru",
965 STAMUNIT_BYTES,
966 "Number of bytes cached in FRU ghost list");
967
968#ifdef VBOX_WITH_STATISTICS
969 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
970 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
971 "/PDM/AsyncCompletion/File/CacheHits",
972 STAMUNIT_COUNT, "Number of hits in the cache");
973 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
974 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
975 "/PDM/AsyncCompletion/File/CachePartialHits",
976 STAMUNIT_COUNT, "Number of partial hits in the cache");
977 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
978 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
979 "/PDM/AsyncCompletion/File/CacheMisses",
980 STAMUNIT_COUNT, "Number of misses when accessing the cache");
981 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
982 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
983 "/PDM/AsyncCompletion/File/CacheRead",
984 STAMUNIT_BYTES, "Number of bytes read from the cache");
985 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
986 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
987 "/PDM/AsyncCompletion/File/CacheWritten",
988 STAMUNIT_BYTES, "Number of bytes written to the cache");
989 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
990 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
991 "/PDM/AsyncCompletion/File/CacheTreeGet",
992 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
993 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
994 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
995 "/PDM/AsyncCompletion/File/CacheTreeInsert",
996 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
997 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
998 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
999 "/PDM/AsyncCompletion/File/CacheTreeRemove",
1000 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1001 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
1002 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1003 "/PDM/AsyncCompletion/File/CacheBuffersReused",
1004 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1005#endif
1006
1007 /* Initialize the critical section */
1008 rc = RTCritSectInit(&pCache->CritSect);
1009
1010 if (RT_SUCCESS(rc))
1011 {
1012 /* Create the commit timer */
1013 if (pCache->u32CommitTimeoutMs > 0)
1014 rc = TMR3TimerCreateInternal(pClassFile->Core.pVM, TMCLOCK_REAL,
1015 pdmacFileCacheCommitTimerCallback,
1016 pClassFile,
1017 "Cache-Commit",
1018 &pClassFile->Cache.pTimerCommit);
1019
1020 if (RT_SUCCESS(rc))
1021 {
1022 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
1023 LogRel(("AIOMgr: Cache commit interval is %u ms\n", pCache->u32CommitTimeoutMs));
1024 LogRel(("AIOMgr: Cache commit threshold is %u bytes\n", pCache->cbCommitDirtyThreshold));
1025 return VINF_SUCCESS;
1026 }
1027
1028 RTCritSectDelete(&pCache->CritSect);
1029 }
1030
1031 return rc;
1032}
1033
1034/**
1035 * Destroysthe cache freeing all data.
1036 *
1037 * returns nothing.
1038 * @param pClassFile The global class data for file endpoints.
1039 */
1040void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1041{
1042 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
1043
1044 /* Make sure no one else uses the cache now */
1045 pdmacFileCacheLockEnter(pCache);
1046
1047 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1048 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
1049 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
1050 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
1051
1052 pdmacFileCacheLockLeave(pCache);
1053
1054 RTCritSectDelete(&pCache->CritSect);
1055}
1056
1057/**
1058 * Initializes per endpoint cache data
1059 * like the AVL tree used to access cached entries.
1060 *
1061 * @returns VBox status code.
1062 * @param pEndpoint The endpoint to init the cache for,
1063 * @param pClassFile The global class data for file endpoints.
1064 */
1065int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
1066{
1067 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1068
1069 pEndpointCache->pCache = &pClassFile->Cache;
1070 RTListInit(&pEndpointCache->ListDirtyNotCommitted);
1071 int rc = RTSpinlockCreate(&pEndpointCache->LockList);
1072
1073 if (RT_SUCCESS(rc))
1074 {
1075 rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
1076 if (RT_SUCCESS(rc))
1077 {
1078 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1079 if (pEndpointCache->pTree)
1080 {
1081 pClassFile->Cache.cRefs++;
1082 RTListAppend(&pClassFile->Cache.ListEndpoints, &pEndpointCache->NodeCacheEndpoint);
1083
1084 /* Arm the timer if this is the first endpoint. */
1085 if ( pClassFile->Cache.cRefs == 1
1086 && pClassFile->Cache.u32CommitTimeoutMs > 0)
1087 rc = TMTimerSetMillies(pClassFile->Cache.pTimerCommit, pClassFile->Cache.u32CommitTimeoutMs);
1088 }
1089 else
1090 rc = VERR_NO_MEMORY;
1091
1092 if (RT_FAILURE(rc))
1093 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1094 }
1095
1096 if (RT_FAILURE(rc))
1097 RTSpinlockDestroy(pEndpointCache->LockList);
1098 }
1099
1100#ifdef VBOX_WITH_STATISTICS
1101 if (RT_SUCCESS(rc))
1102 {
1103 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
1104 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1105 STAMUNIT_COUNT, "Number of deferred writes",
1106 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
1107 }
1108#endif
1109
1110 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1111 return rc;
1112}
1113
1114/**
1115 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1116 *
1117 * @returns IPRT status code.
1118 * @param pNode The node to destroy.
1119 * @param pvUser Opaque user data.
1120 */
1121static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1122{
1123 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
1124 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
1125 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
1126
1127 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
1128 {
1129 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1130 RTThreadSleep(250);
1131 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1132 }
1133
1134 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
1135 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1136
1137 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1138 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1139
1140 pdmacFileCacheEntryRemoveFromList(pEntry);
1141
1142 if (fUpdateCache)
1143 pdmacFileCacheSub(pCache, pEntry->cbData);
1144
1145 RTMemPageFree(pEntry->pbData);
1146 RTMemFree(pEntry);
1147
1148 return VINF_SUCCESS;
1149}
1150
1151/**
1152 * Destroys all cache ressources used by the given endpoint.
1153 *
1154 * @returns nothing.
1155 * @param pEndpoint The endpoint to the destroy.
1156 */
1157void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1158{
1159 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1160 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1161
1162 /* Make sure nobody is accessing the cache while we delete the tree. */
1163 pdmacFileCacheLockEnter(pCache);
1164 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1165 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
1166 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1167
1168 RTSpinlockDestroy(pEndpointCache->LockList);
1169
1170 pCache->cRefs--;
1171 RTListNodeRemove(&pEndpointCache->NodeCacheEndpoint);
1172
1173 if ( !pCache->cRefs
1174 && pCache->u32CommitTimeoutMs > 0)
1175 TMTimerStop(pCache->pTimerCommit);
1176
1177 pdmacFileCacheLockLeave(pCache);
1178
1179 RTSemRWDestroy(pEndpointCache->SemRWEntries);
1180
1181#ifdef VBOX_WITH_STATISTICS
1182 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
1183
1184 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
1185#endif
1186}
1187
1188static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1189{
1190 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1191 PPDMACFILECACHEENTRY pEntry = NULL;
1192
1193 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1194
1195 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1196 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
1197 if (pEntry)
1198 pdmacFileEpCacheEntryRef(pEntry);
1199 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1200
1201 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1202
1203 return pEntry;
1204}
1205
1206static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
1207{
1208 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1209 PPDMACFILECACHEENTRY pEntry = NULL;
1210
1211 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1212
1213 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1214 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
1215 if (pEntry)
1216 pdmacFileEpCacheEntryRef(pEntry);
1217 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
1218
1219 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1220
1221 return pEntry;
1222}
1223
1224static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1225{
1226 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1227
1228 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1229 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1230 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1231 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1232 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1233 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1234}
1235
1236/**
1237 * Allocates and initializes a new entry for the cache.
1238 * The entry has a reference count of 1.
1239 *
1240 * @returns Pointer to the new cache entry or NULL if out of memory.
1241 * @param pCache The cache the entry belongs to.
1242 * @param pEndoint The endpoint the entry holds data for.
1243 * @param off Start offset.
1244 * @param cbData Size of the cache entry.
1245 * @param pbBuffer Pointer to the buffer to use.
1246 * NULL if a new buffer should be allocated.
1247 * The buffer needs to have the same size of the entry.
1248 */
1249static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1250 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1251 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1252{
1253 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1254
1255 if (RT_UNLIKELY(!pEntryNew))
1256 return NULL;
1257
1258 pEntryNew->Core.Key = off;
1259 pEntryNew->Core.KeyLast = off + cbData - 1;
1260 pEntryNew->pEndpoint = pEndpoint;
1261 pEntryNew->pCache = pCache;
1262 pEntryNew->fFlags = 0;
1263 pEntryNew->cRefs = 1; /* We are using it now. */
1264 pEntryNew->pList = NULL;
1265 pEntryNew->cbData = cbData;
1266 pEntryNew->pWaitingHead = NULL;
1267 pEntryNew->pWaitingTail = NULL;
1268 pEntryNew->pbDataReplace = NULL;
1269 if (pbBuffer)
1270 pEntryNew->pbData = pbBuffer;
1271 else
1272 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1273
1274 if (RT_UNLIKELY(!pEntryNew->pbData))
1275 {
1276 RTMemFree(pEntryNew);
1277 return NULL;
1278 }
1279
1280 return pEntryNew;
1281}
1282
1283/**
1284 * Adds a segment to the waiting list for a cache entry
1285 * which is currently in progress.
1286 *
1287 * @returns nothing.
1288 * @param pEntry The cache entry to add the segment to.
1289 * @param pSeg The segment to add.
1290 */
1291DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1292{
1293 pSeg->pNext = NULL;
1294
1295 if (pEntry->pWaitingHead)
1296 {
1297 AssertPtr(pEntry->pWaitingTail);
1298
1299 pEntry->pWaitingTail->pNext = pSeg;
1300 pEntry->pWaitingTail = pSeg;
1301 }
1302 else
1303 {
1304 Assert(!pEntry->pWaitingTail);
1305
1306 pEntry->pWaitingHead = pSeg;
1307 pEntry->pWaitingTail = pSeg;
1308 }
1309}
1310
1311/**
1312 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1313 * in exclusive mode.
1314 *
1315 * @returns true if the flag in fSet is set and the one in fClear is clear.
1316 * false othwerise.
1317 * The R/W semaphore is only held if true is returned.
1318 *
1319 * @param pEndpointCache The endpoint cache instance data.
1320 * @param pEntry The entry to check the flags for.
1321 * @param fSet The flag which is tested to be set.
1322 * @param fClear The flag which is tested to be clear.
1323 */
1324DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1325 PPDMACFILECACHEENTRY pEntry,
1326 uint32_t fSet, uint32_t fClear)
1327{
1328 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1329 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1330
1331 if (fPassed)
1332 {
1333 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1334 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1335
1336 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1337 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1338
1339 /* Drop the lock if we didn't passed the test. */
1340 if (!fPassed)
1341 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1342 }
1343
1344 return fPassed;
1345}
1346
1347/**
1348 * Copies data to a buffer described by a I/O memory context.
1349 *
1350 * @returns nothing.
1351 * @param pIoMemCtx The I/O memory context to copy the data into.
1352 * @param pbData Pointer to the data data to copy.
1353 * @param cbData Amount of data to copy.
1354 */
1355static void pdmacFileEpCacheCopyToIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1356 uint8_t *pbData,
1357 size_t cbData)
1358{
1359 while (cbData)
1360 {
1361 size_t cbCopy = cbData;
1362 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1363
1364 AssertPtr(pbBuf);
1365
1366 memcpy(pbBuf, pbData, cbCopy);
1367
1368 cbData -= cbCopy;
1369 pbData += cbCopy;
1370 }
1371}
1372
1373/**
1374 * Copies data from a buffer described by a I/O memory context.
1375 *
1376 * @returns nothing.
1377 * @param pIoMemCtx The I/O memory context to copy the data from.
1378 * @param pbData Pointer to the destination buffer.
1379 * @param cbData Amount of data to copy.
1380 */
1381static void pdmacFileEpCacheCopyFromIoMemCtx(PPDMIOMEMCTX pIoMemCtx,
1382 uint8_t *pbData,
1383 size_t cbData)
1384{
1385 while (cbData)
1386 {
1387 size_t cbCopy = cbData;
1388 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbCopy);
1389
1390 AssertPtr(pbBuf);
1391
1392 memcpy(pbData, pbBuf, cbCopy);
1393
1394 cbData -= cbCopy;
1395 pbData += cbCopy;
1396 }
1397}
1398
1399/**
1400 * Add a buffer described by the I/O memory context
1401 * to the entry waiting for completion.
1402 *
1403 * @returns nothing.
1404 * @param pEntry The entry to add the buffer to.
1405 * @param pTask Task associated with the buffer.
1406 * @param pIoMemCtx The memory context to use.
1407 * @param OffDiff Offset from the start of the buffer
1408 * in the entry.
1409 * @param cbData Amount of data to wait for onthis entry.
1410 * @param fWrite Flag whether the task waits because it wants to write
1411 * to the cache entry.
1412 */
1413static void pdmacFileEpCacheEntryWaitersAdd(PPDMACFILECACHEENTRY pEntry,
1414 PPDMASYNCCOMPLETIONTASKFILE pTask,
1415 PPDMIOMEMCTX pIoMemCtx,
1416 RTFOFF OffDiff,
1417 size_t cbData,
1418 bool fWrite)
1419{
1420 while (cbData)
1421 {
1422 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1423 size_t cbSeg = cbData;
1424 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1425
1426 pSeg->pTask = pTask;
1427 pSeg->uBufOffset = OffDiff;
1428 pSeg->cbTransfer = cbSeg;
1429 pSeg->pvBuf = pbBuf;
1430 pSeg->fWrite = fWrite;
1431
1432 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1433
1434 cbData -= cbSeg;
1435 OffDiff += cbSeg;
1436 }
1437}
1438
1439/**
1440 * Passthrough a part of a request directly to the I/O manager
1441 * handling the endpoint.
1442 *
1443 * @returns nothing.
1444 * @param pEndpoint The endpoint.
1445 * @param pTask The task.
1446 * @param pIoMemCtx The I/O memory context to use.
1447 * @param offStart Offset to start transfer from.
1448 * @param cbData Amount of data to transfer.
1449 * @param enmTransferType The transfer type (read/write)
1450 */
1451static void pdmacFileEpCacheRequestPassthrough(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1452 PPDMASYNCCOMPLETIONTASKFILE pTask,
1453 PPDMIOMEMCTX pIoMemCtx,
1454 RTFOFF offStart, size_t cbData,
1455 PDMACTASKFILETRANSFER enmTransferType)
1456{
1457 while (cbData)
1458 {
1459 size_t cbSeg = cbData;
1460 uint8_t *pbBuf = pdmIoMemCtxGetBuffer(pIoMemCtx, &cbSeg);
1461 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1462 AssertPtr(pIoTask);
1463
1464 pIoTask->pEndpoint = pEndpoint;
1465 pIoTask->enmTransferType = enmTransferType;
1466 pIoTask->Off = offStart;
1467 pIoTask->DataSeg.cbSeg = cbSeg;
1468 pIoTask->DataSeg.pvSeg = pbBuf;
1469 pIoTask->pvUser = pTask;
1470 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1471
1472 offStart += cbSeg;
1473 cbData -= cbSeg;
1474
1475 /* Send it off to the I/O manager. */
1476 pdmacFileEpAddTask(pEndpoint, pIoTask);
1477 }
1478}
1479
1480/**
1481 * Reads the specified data from the endpoint using the cache if possible.
1482 *
1483 * @returns VBox status code.
1484 * @param pEndpoint The endpoint to read from.
1485 * @param pTask The task structure used as identifier for this request.
1486 * @param off The offset to start reading from.
1487 * @param paSegments Pointer to the array holding the destination buffers.
1488 * @param cSegments Number of segments in the array.
1489 * @param cbRead Number of bytes to read.
1490 */
1491int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1492 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1493 size_t cbRead)
1494{
1495 int rc = VINF_SUCCESS;
1496 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1497 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1498 PPDMACFILECACHEENTRY pEntry;
1499
1500 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1501 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1502
1503 pTask->cbTransferLeft = cbRead;
1504 /* Set to completed to make sure that the task is valid while we access it. */
1505 ASMAtomicWriteBool(&pTask->fCompleted, true);
1506
1507 /* Init the I/O memory context */
1508 PDMIOMEMCTX IoMemCtx;
1509 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1510
1511 while (cbRead)
1512 {
1513 size_t cbToRead;
1514
1515 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1516
1517 /*
1518 * If there is no entry we try to create a new one eviciting unused pages
1519 * if the cache is full. If this is not possible we will pass the request through
1520 * and skip the caching (all entries may be still in progress so they can't
1521 * be evicted)
1522 * If we have an entry it can be in one of the LRU lists where the entry
1523 * contains data (recently used or frequently used LRU) so we can just read
1524 * the data we need and put the entry at the head of the frequently used LRU list.
1525 * In case the entry is in one of the ghost lists it doesn't contain any data.
1526 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1527 */
1528 if (pEntry)
1529 {
1530 RTFOFF OffDiff = off - pEntry->Core.Key;
1531
1532 AssertMsg(off >= pEntry->Core.Key,
1533 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1534 off, pEntry->Core.Key));
1535
1536 AssertPtr(pEntry->pList);
1537
1538 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1539
1540 AssertMsg(off + (RTFOFF)cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1541 ("Buffer of cache entry exceeded off=%RTfoff cbToRead=%d\n",
1542 off, cbToRead));
1543
1544 cbRead -= cbToRead;
1545
1546 if (!cbRead)
1547 STAM_COUNTER_INC(&pCache->cHits);
1548 else
1549 STAM_COUNTER_INC(&pCache->cPartialHits);
1550
1551 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1552
1553 /* Ghost lists contain no data. */
1554 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1555 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1556 {
1557 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1558 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1559 0))
1560 {
1561 /* Entry is deprecated. Read data from the new buffer. */
1562 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbDataReplace + OffDiff, cbToRead);
1563 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1564 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1565 }
1566 else
1567 {
1568 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1569 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1570 PDMACFILECACHE_ENTRY_IS_DIRTY))
1571 {
1572 /* Entry didn't completed yet. Append to the list */
1573 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1574 &IoMemCtx,
1575 OffDiff, cbToRead,
1576 false /* fWrite */);
1577 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1578 }
1579 else
1580 {
1581 /* Read as much as we can from the entry. */
1582 pdmacFileEpCacheCopyToIoMemCtx(&IoMemCtx, pEntry->pbData + OffDiff, cbToRead);
1583 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToRead);
1584 }
1585 }
1586
1587 /* Move this entry to the top position */
1588 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1589 {
1590 pdmacFileCacheLockEnter(pCache);
1591 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1592 pdmacFileCacheLockLeave(pCache);
1593 }
1594 /* Release the entry */
1595 pdmacFileEpCacheEntryRelease(pEntry);
1596 }
1597 else
1598 {
1599 uint8_t *pbBuffer = NULL;
1600
1601 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1602
1603 pdmacFileCacheLockEnter(pCache);
1604 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1605 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1606
1607 /* Move the entry to Am and fetch it to the cache. */
1608 if (fEnough)
1609 {
1610 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1611 pdmacFileCacheAdd(pCache, pEntry->cbData);
1612 pdmacFileCacheLockLeave(pCache);
1613
1614 if (pbBuffer)
1615 pEntry->pbData = pbBuffer;
1616 else
1617 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1618 AssertPtr(pEntry->pbData);
1619
1620 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1621 &IoMemCtx,
1622 OffDiff, cbToRead,
1623 false /* fWrite */);
1624 pdmacFileCacheReadFromEndpoint(pEntry);
1625 /* Release the entry */
1626 pdmacFileEpCacheEntryRelease(pEntry);
1627 }
1628 else
1629 {
1630 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1631 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1632 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1633 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1634 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1635
1636 pdmacFileCacheLockLeave(pCache);
1637
1638 RTMemFree(pEntry);
1639
1640 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1641 &IoMemCtx, off, cbToRead,
1642 PDMACTASKFILETRANSFER_READ);
1643 }
1644 }
1645 }
1646 else
1647 {
1648 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1649 size_t cbToReadAligned;
1650 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1651
1652 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1653 pEntryBestFit ? "" : "No ",
1654 off,
1655 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1656 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1657 pEntryBestFit ? pEntryBestFit->cbData : 0));
1658
1659 if ( pEntryBestFit
1660 && off + (RTFOFF)cbRead > pEntryBestFit->Core.Key)
1661 {
1662 cbToRead = pEntryBestFit->Core.Key - off;
1663 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1664 cbToReadAligned = cbToRead;
1665 }
1666 else
1667 {
1668 /*
1669 * Align the size to a 4KB boundary.
1670 * Memory size is aligned to a page boundary
1671 * and memory is wasted if the size is rahter small.
1672 * (For example reads with a size of 512 bytes.
1673 */
1674 cbToRead = cbRead;
1675 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1676
1677 /* Clip read to file size */
1678 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1679 if (pEntryBestFit)
1680 {
1681 Assert(pEntryBestFit->Core.Key >= off);
1682 cbToReadAligned = RT_MIN(cbToReadAligned, (uint64_t)pEntryBestFit->Core.Key - off);
1683 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1684 }
1685 }
1686
1687 cbRead -= cbToRead;
1688
1689 if (!cbRead)
1690 STAM_COUNTER_INC(&pCache->cMisses);
1691 else
1692 STAM_COUNTER_INC(&pCache->cPartialHits);
1693
1694 uint8_t *pbBuffer = NULL;
1695
1696 pdmacFileCacheLockEnter(pCache);
1697 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1698
1699 if (fEnough)
1700 {
1701 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1702
1703 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1704 AssertPtr(pEntryNew);
1705
1706 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1707 pdmacFileCacheAdd(pCache, cbToReadAligned);
1708 pdmacFileCacheLockLeave(pCache);
1709
1710 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1711
1712 AssertMsg( (off >= pEntryNew->Core.Key)
1713 && (off + (RTFOFF)cbToRead <= pEntryNew->Core.Key + pEntryNew->Core.KeyLast + 1),
1714 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1715 off, pEntryNew->Core.Key));
1716
1717 pdmacFileEpCacheEntryWaitersAdd(pEntryNew, pTask,
1718 &IoMemCtx, 0, cbToRead,
1719 false /* fWrite */);
1720 pdmacFileCacheReadFromEndpoint(pEntryNew);
1721 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1722 }
1723 else
1724 {
1725 pdmacFileCacheLockLeave(pCache);
1726
1727 /*
1728 * There is not enough free space in the cache.
1729 * Pass the request directly to the I/O manager.
1730 */
1731 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1732
1733 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1734 &IoMemCtx, off, cbToRead,
1735 PDMACTASKFILETRANSFER_READ);
1736 }
1737 }
1738 off += cbToRead;
1739 }
1740
1741 ASMAtomicWriteBool(&pTask->fCompleted, false);
1742
1743 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1744 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1745 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1746 else
1747 rc = VINF_AIO_TASK_PENDING;
1748
1749 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1750
1751 return rc;
1752}
1753
1754/**
1755 * Writes the given data to the endpoint using the cache if possible.
1756 *
1757 * @returns VBox status code.
1758 * @param pEndpoint The endpoint to write to.
1759 * @param pTask The task structure used as identifier for this request.
1760 * @param off The offset to start writing to
1761 * @param paSegments Pointer to the array holding the source buffers.
1762 * @param cSegments Number of segments in the array.
1763 * @param cbWrite Number of bytes to write.
1764 */
1765int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1766 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1767 size_t cbWrite)
1768{
1769 int rc = VINF_SUCCESS;
1770 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1771 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1772 PPDMACFILECACHEENTRY pEntry;
1773
1774 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1775 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1776
1777 pTask->cbTransferLeft = cbWrite;
1778 /* Set to completed to make sure that the task is valid while we access it. */
1779 ASMAtomicWriteBool(&pTask->fCompleted, true);
1780
1781 /* Init the I/O memory context */
1782 PDMIOMEMCTX IoMemCtx;
1783 pdmIoMemCtxInit(&IoMemCtx, paSegments, cSegments);
1784
1785 while (cbWrite)
1786 {
1787 size_t cbToWrite;
1788
1789 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1790
1791 if (pEntry)
1792 {
1793 /* Write the data into the entry and mark it as dirty */
1794 AssertPtr(pEntry->pList);
1795
1796 RTFOFF OffDiff = off - pEntry->Core.Key;
1797
1798 AssertMsg(off >= pEntry->Core.Key,
1799 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1800 off, pEntry->Core.Key));
1801
1802 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1803 cbWrite -= cbToWrite;
1804
1805 if (!cbWrite)
1806 STAM_COUNTER_INC(&pCache->cHits);
1807 else
1808 STAM_COUNTER_INC(&pCache->cPartialHits);
1809
1810 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1811
1812 /* Ghost lists contain no data. */
1813 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1814 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1815 {
1816 /* Check if the buffer is deprecated. */
1817 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1818 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1819 0))
1820 {
1821 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1822 ("Entry is deprecated but not in progress\n"));
1823 AssertPtr(pEntry->pbDataReplace);
1824
1825 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1826
1827 /* Update the data from the write. */
1828 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1829 pEntry->pbDataReplace + OffDiff,
1830 cbToWrite);
1831 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1832 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1833 }
1834 else /* Deprecated flag not set */
1835 {
1836 /* Check if the entry is dirty. */
1837 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1838 PDMACFILECACHE_ENTRY_IS_DIRTY,
1839 0))
1840 {
1841 /* If it is dirty but not in progrss just update the data. */
1842 if (!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS))
1843 {
1844 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1845 pEntry->pbData + OffDiff,
1846 cbToWrite);
1847 }
1848 else
1849 {
1850 Assert(!pEntry->pbDataReplace);
1851
1852 /* Deprecate the current buffer. */
1853 if (!pEntry->pWaitingHead)
1854 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1855
1856 /* If we are out of memory or have waiting segments
1857 * defer the write. */
1858 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1859 {
1860 /* The data isn't written to the file yet */
1861 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1862 &IoMemCtx,
1863 OffDiff, cbToWrite,
1864 true /* fWrite */);
1865 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1866 }
1867 else /* Deprecate buffer */
1868 {
1869 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1870 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1871
1872 /* Copy the data before the update. */
1873 if (OffDiff)
1874 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1875
1876 /* Copy data behind the update. */
1877 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1878 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1879 pEntry->pbData + OffDiff + cbToWrite,
1880 (pEntry->cbData - OffDiff - cbToWrite));
1881
1882 /* Update the data from the write. */
1883 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1884 pEntry->pbDataReplace + OffDiff,
1885 cbToWrite);
1886 /* We are done here. A new write is initiated if the current request completes. */
1887 }
1888 }
1889
1890 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1891 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1892 }
1893 else /* Dirty bit not set */
1894 {
1895 /*
1896 * Check if a read is in progress for this entry.
1897 * We have to defer processing in that case.
1898 */
1899 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1900 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1901 0))
1902 {
1903 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1904 &IoMemCtx,
1905 OffDiff, cbToWrite,
1906 true /* fWrite */);
1907 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1908 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1909 }
1910 else /* I/O in progress flag not set */
1911 {
1912 /* Write as much as we can into the entry and update the file. */
1913 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
1914 pEntry->pbData + OffDiff,
1915 cbToWrite);
1916 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
1917
1918 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntry);
1919 if (fCommit)
1920 pdmacFileCacheCommitDirtyEntries(pCache);
1921 }
1922 } /* Dirty bit not set */
1923
1924 /* Move this entry to the top position */
1925 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1926 {
1927 pdmacFileCacheLockEnter(pCache);
1928 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1929 pdmacFileCacheLockLeave(pCache);
1930 } /* Deprecated flag not set. */
1931 }
1932 pdmacFileEpCacheEntryRelease(pEntry);
1933 }
1934 else /* Entry is on the ghost list */
1935 {
1936 uint8_t *pbBuffer = NULL;
1937
1938 pdmacFileCacheLockEnter(pCache);
1939 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1940 bool fEnough = pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1941
1942 if (fEnough)
1943 {
1944 /* Move the entry to Am and fetch it to the cache. */
1945 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1946 pdmacFileCacheAdd(pCache, pEntry->cbData);
1947 pdmacFileCacheLockLeave(pCache);
1948
1949 if (pbBuffer)
1950 pEntry->pbData = pbBuffer;
1951 else
1952 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1953 AssertPtr(pEntry->pbData);
1954
1955 pdmacFileEpCacheEntryWaitersAdd(pEntry, pTask,
1956 &IoMemCtx,
1957 OffDiff, cbToWrite,
1958 true /* fWrite */);
1959 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1960 pdmacFileCacheReadFromEndpoint(pEntry);
1961
1962 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1963 pdmacFileEpCacheEntryRelease(pEntry);
1964 }
1965 else
1966 {
1967 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1968 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1969 RTAvlrFileOffsetRemove(pEndpointCache->pTree, pEntry->Core.Key);
1970 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1971 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1972
1973 pdmacFileCacheLockLeave(pCache);
1974
1975 RTMemFree(pEntry);
1976 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
1977 &IoMemCtx, off, cbToWrite,
1978 PDMACTASKFILETRANSFER_WRITE);
1979 }
1980 }
1981 }
1982 else /* No entry found */
1983 {
1984 /*
1985 * No entry found. Try to create a new cache entry to store the data in and if that fails
1986 * write directly to the file.
1987 */
1988 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1989
1990 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1991 pEntryBestFit ? "B" : "No b",
1992 off,
1993 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1994 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1995 pEntryBestFit ? pEntryBestFit->cbData : 0));
1996
1997 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1998 {
1999 cbToWrite = pEntryBestFit->Core.Key - off;
2000 pdmacFileEpCacheEntryRelease(pEntryBestFit);
2001 }
2002 else
2003 {
2004 if (pEntryBestFit)
2005 pdmacFileEpCacheEntryRelease(pEntryBestFit);
2006
2007 cbToWrite = cbWrite;
2008 }
2009
2010 cbWrite -= cbToWrite;
2011
2012 STAM_COUNTER_INC(&pCache->cMisses);
2013
2014 uint8_t *pbBuffer = NULL;
2015
2016 pdmacFileCacheLockEnter(pCache);
2017 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
2018
2019 if (fEnough)
2020 {
2021 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
2022
2023 PPDMACFILECACHEENTRY pEntryNew;
2024
2025 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
2026 AssertPtr(pEntryNew);
2027
2028 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
2029 pdmacFileCacheAdd(pCache, cbToWrite);
2030 pdmacFileCacheLockLeave(pCache);
2031
2032 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
2033
2034 pdmacFileEpCacheCopyFromIoMemCtx(&IoMemCtx,
2035 pEntryNew->pbData,
2036 cbToWrite);
2037 ASMAtomicSubS32(&pTask->cbTransferLeft, cbToWrite);
2038
2039 bool fCommit = pdmacFileCacheAddDirtyEntry(pEndpointCache, pEntryNew);
2040 if (fCommit)
2041 pdmacFileCacheCommitDirtyEntries(pCache);
2042 pdmacFileEpCacheEntryRelease(pEntryNew);
2043 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2044 }
2045 else
2046 {
2047 pdmacFileCacheLockLeave(pCache);
2048
2049 /*
2050 * There is not enough free space in the cache.
2051 * Pass the request directly to the I/O manager.
2052 */
2053 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2054
2055 pdmacFileEpCacheRequestPassthrough(pEndpoint, pTask,
2056 &IoMemCtx, off, cbToWrite,
2057 PDMACTASKFILETRANSFER_WRITE);
2058 }
2059 }
2060
2061 off += cbToWrite;
2062 }
2063
2064 ASMAtomicWriteBool(&pTask->fCompleted, false);
2065
2066 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
2067 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
2068 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2069 else
2070 rc = VINF_AIO_TASK_PENDING;
2071
2072 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2073
2074 return rc;
2075}
2076
2077int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
2078{
2079 int rc = VINF_SUCCESS;
2080
2081 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p\n",
2082 pEndpoint, pEndpoint->Core.pszUri, pTask));
2083
2084 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
2085 rc = VERR_RESOURCE_BUSY;
2086 else
2087 {
2088 /* Check for dirty entries in the cache. */
2089 pdmacFileCacheEndpointCommit(&pEndpoint->DataCache);
2090 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
2091 {
2092 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
2093 rc = VINF_AIO_TASK_PENDING;
2094 }
2095 else
2096 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
2097 }
2098
2099 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2100 return rc;
2101}
2102
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette