VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileCache.cpp@ 26175

Last change on this file since 26175 was 26147, checked in by vboxsync, 15 years ago

AsyncCompletion: Fix incorrect count of outstanding write tasks. Fixes hangs during flush requests. Return VINF_AIO_TASK_PENDING if data needs to be read from the file

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 76.5 KB
Line 
1/* $Id: PDMAsyncCompletionFileCache.cpp 26147 2010-02-02 13:55:20Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * File data cache.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22
23/** @page pg_pdm_async_completion_cache PDM Async Completion Cache - The file I/O cache
24 * This component implements an I/O cache for file endpoints based on the ARC algorithm.
25 * http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
26 *
27 * The algorithm uses four LRU (Least frequently used) lists to store data in the cache.
28 * Two of them contain data where one stores entries which were accessed recently and one
29 * which is used for frequently accessed data.
30 * The other two lists are called ghost lists and store information about the accessed range
31 * but do not contain data. They are used to track data access. If these entries are accessed
32 * they will push the data to a higher position in the cache preventing it from getting removed
33 * quickly again.
34 *
35 * The algorithm needs to be modified to meet our requirements. Like the implementation
36 * for the ZFS filesystem we need to handle pages with a variable size. It would
37 * be possible to use a fixed size but would increase the computational
38 * and memory overhead.
39 * Because we do I/O asynchronously we also need to mark entries which are currently accessed
40 * as non evictable to prevent removal of the entry while the data is being accessed.
41 */
42
43/*******************************************************************************
44* Header Files *
45*******************************************************************************/
46#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
47#include <iprt/types.h>
48#include <iprt/mem.h>
49#include <iprt/path.h>
50#include <VBox/log.h>
51#include <VBox/stam.h>
52
53#include "PDMAsyncCompletionFileInternal.h"
54
55#ifdef VBOX_STRICT
56# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
57 do \
58 { \
59 AssertMsg(RTCritSectIsOwner(&pCache->CritSect), \
60 ("Thread does not own critical section\n"));\
61 } while(0);
62#else
63# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0);
64#endif
65
66/*******************************************************************************
67* Internal Functions *
68*******************************************************************************/
69static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser);
70
71DECLINLINE(void) pdmacFileEpCacheEntryRelease(PPDMACFILECACHEENTRY pEntry)
72{
73 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
74 ASMAtomicDecU32(&pEntry->cRefs);
75}
76
77DECLINLINE(void) pdmacFileEpCacheEntryRef(PPDMACFILECACHEENTRY pEntry)
78{
79 ASMAtomicIncU32(&pEntry->cRefs);
80}
81
82/**
83 * Checks consistency of a LRU list.
84 *
85 * @returns nothing
86 * @param pList The LRU list to check.
87 * @param pNotInList Element which is not allowed to occur in the list.
88 */
89static void pdmacFileCacheCheckList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pNotInList)
90{
91#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
92 PPDMACFILECACHEENTRY pCurr = pList->pHead;
93
94 /* Check that there are no double entries and no cycles in the list. */
95 while (pCurr)
96 {
97 PPDMACFILECACHEENTRY pNext = pCurr->pNext;
98
99 while (pNext)
100 {
101 AssertMsg(pCurr != pNext,
102 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
103 pCurr, pList));
104 pNext = pNext->pNext;
105 }
106
107 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
108
109 if (!pCurr->pNext)
110 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
111
112 pCurr = pCurr->pNext;
113 }
114#endif
115}
116
117/**
118 * Unlinks a cache entry from the LRU list it is assigned to.
119 *
120 * @returns nothing.
121 * @param pEntry The entry to unlink.
122 */
123static void pdmacFileCacheEntryRemoveFromList(PPDMACFILECACHEENTRY pEntry)
124{
125 PPDMACFILELRULIST pList = pEntry->pList;
126 PPDMACFILECACHEENTRY pPrev, pNext;
127
128 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
129
130 AssertPtr(pList);
131 pdmacFileCacheCheckList(pList, NULL);
132
133 pPrev = pEntry->pPrev;
134 pNext = pEntry->pNext;
135
136 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
137 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
138
139 if (pPrev)
140 pPrev->pNext = pNext;
141 else
142 {
143 pList->pHead = pNext;
144
145 if (pNext)
146 pNext->pPrev = NULL;
147 }
148
149 if (pNext)
150 pNext->pPrev = pPrev;
151 else
152 {
153 pList->pTail = pPrev;
154
155 if (pPrev)
156 pPrev->pNext = NULL;
157 }
158
159 pEntry->pList = NULL;
160 pEntry->pPrev = NULL;
161 pEntry->pNext = NULL;
162 pList->cbCached -= pEntry->cbData;
163 pdmacFileCacheCheckList(pList, pEntry);
164}
165
166/**
167 * Adds a cache entry to the given LRU list unlinking it from the currently
168 * assigned list if needed.
169 *
170 * @returns nothing.
171 * @param pList List to the add entry to.
172 * @param pEntry Entry to add.
173 */
174static void pdmacFileCacheEntryAddToList(PPDMACFILELRULIST pList, PPDMACFILECACHEENTRY pEntry)
175{
176 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
177 pdmacFileCacheCheckList(pList, NULL);
178
179 /* Remove from old list if needed */
180 if (pEntry->pList)
181 pdmacFileCacheEntryRemoveFromList(pEntry);
182
183 pEntry->pNext = pList->pHead;
184 if (pList->pHead)
185 pList->pHead->pPrev = pEntry;
186 else
187 {
188 Assert(!pList->pTail);
189 pList->pTail = pEntry;
190 }
191
192 pEntry->pPrev = NULL;
193 pList->pHead = pEntry;
194 pList->cbCached += pEntry->cbData;
195 pEntry->pList = pList;
196 pdmacFileCacheCheckList(pList, NULL);
197}
198
199/**
200 * Destroys a LRU list freeing all entries.
201 *
202 * @returns nothing
203 * @param pList Pointer to the LRU list to destroy.
204 *
205 * @note The caller must own the critical section of the cache.
206 */
207static void pdmacFileCacheDestroyList(PPDMACFILELRULIST pList)
208{
209 while (pList->pHead)
210 {
211 PPDMACFILECACHEENTRY pEntry = pList->pHead;
212
213 pList->pHead = pEntry->pNext;
214
215 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
216 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
217
218 RTMemPageFree(pEntry->pbData);
219 RTMemFree(pEntry);
220 }
221}
222
223/**
224 * Tries to remove the given amount of bytes from a given list in the cache
225 * moving the entries to one of the given ghosts lists
226 *
227 * @returns Amount of data which could be freed.
228 * @param pCache Pointer to the global cache data.
229 * @param cbData The amount of the data to free.
230 * @param pListSrc The source list to evict data from.
231 * @param pGhostListSrc The ghost list removed entries should be moved to
232 * NULL if the entry should be freed.
233 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
234 * @param ppbBuf Where to store the address of the buffer if an entry with the
235 * same size was found and fReuseBuffer is true.
236 *
237 * @note This function may return fewer bytes than requested because entries
238 * may be marked as non evictable if they are used for I/O at the
239 * moment.
240 */
241static size_t pdmacFileCacheEvictPagesFrom(PPDMACFILECACHEGLOBAL pCache, size_t cbData,
242 PPDMACFILELRULIST pListSrc, PPDMACFILELRULIST pGhostListDst,
243 bool fReuseBuffer, uint8_t **ppbBuffer)
244{
245 size_t cbEvicted = 0;
246
247 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
248
249 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
250#ifdef VBOX_WITH_2Q_CACHE
251 AssertMsg( !pGhostListDst
252 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
253 ("Destination list must be NULL or the recently used but paged out list\n"));
254#else
255 AssertMsg( !pGhostListDst
256 || (pGhostListDst == &pCache->LruRecentlyGhost)
257 || (pGhostListDst == &pCache->LruFrequentlyGhost),
258 ("Destination list must be NULL or one of the ghost lists\n"));
259#endif
260
261 if (fReuseBuffer)
262 {
263 AssertPtr(ppbBuffer);
264 *ppbBuffer = NULL;
265 }
266
267 /* Start deleting from the tail. */
268 PPDMACFILECACHEENTRY pEntry = pListSrc->pTail;
269
270 while ((cbEvicted < cbData) && pEntry)
271 {
272 PPDMACFILECACHEENTRY pCurr = pEntry;
273
274 pEntry = pEntry->pPrev;
275
276 /* We can't evict pages which are currently in progress */
277 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
278 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
279 {
280 /* Ok eviction candidate. Grab the endpoint semaphore and check again
281 * because somebody else might have raced us. */
282 PPDMACFILEENDPOINTCACHE pEndpointCache = &pCurr->pEndpoint->DataCache;
283 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
284
285 if (!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS)
286 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
287 {
288 AssertMsg(!(pCurr->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED),
289 ("This entry is deprecated so it should have the I/O in progress flag set\n"));
290 Assert(!pCurr->pbDataReplace);
291
292 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
293
294 if (fReuseBuffer && (pCurr->cbData == cbData))
295 {
296 STAM_COUNTER_INC(&pCache->StatBuffersReused);
297 *ppbBuffer = pCurr->pbData;
298 }
299 else if (pCurr->pbData)
300 RTMemPageFree(pCurr->pbData);
301
302 pCurr->pbData = NULL;
303 cbEvicted += pCurr->cbData;
304
305 pCache->cbCached -= pCurr->cbData;
306
307 if (pGhostListDst)
308 {
309#ifdef VBOX_WITH_2Q_CACHE
310 /* We have to remove the last entries from the paged out list. */
311 while (pGhostListDst->cbCached > pCache->cbRecentlyUsedOutMax)
312 {
313 PPDMACFILECACHEENTRY pFree = pGhostListDst->pTail;
314 PPDMACFILEENDPOINTCACHE pEndpointCacheFree = &pFree->pEndpoint->DataCache;
315
316 RTSemRWRequestWrite(pEndpointCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
317
318 pdmacFileCacheEntryRemoveFromList(pFree);
319
320 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
321 RTAvlrFileOffsetRemove(pEndpointCacheFree->pTree, pFree->Core.Key);
322 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
323
324 RTSemRWReleaseWrite(pEndpointCacheFree->SemRWEntries);
325 RTMemFree(pFree);
326 }
327#endif
328
329 pdmacFileCacheEntryAddToList(pGhostListDst, pCurr);
330 }
331 else
332 {
333 /* Delete the entry from the AVL tree it is assigned to. */
334 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
335 RTAvlrFileOffsetRemove(pCurr->pEndpoint->DataCache.pTree, pCurr->Core.Key);
336 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
337
338 pdmacFileCacheEntryRemoveFromList(pCurr);
339 RTMemFree(pCurr);
340 }
341 }
342 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
343 }
344 else
345 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
346 }
347
348 return cbEvicted;
349}
350
351#ifdef VBOX_WITH_2Q_CACHE
352static bool pdmacFileCacheReclaim(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
353{
354 size_t cbRemoved = 0;
355
356 if ((pCache->cbCached + cbData) < pCache->cbMax)
357 return true;
358 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
359 {
360 /* Try to evict as many bytes as possible from A1in */
361 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
362 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
363
364 /*
365 * If it was not possible to remove enough entries
366 * try the frequently accessed cache.
367 */
368 if (cbRemoved < cbData)
369 {
370 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
371
372 cbRemoved += pdmacFileCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
373 NULL, fReuseBuffer, ppbBuffer);
374 }
375 }
376 else
377 {
378 /* We have to remove entries from frequently access list. */
379 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
380 NULL, fReuseBuffer, ppbBuffer);
381 }
382
383 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
384 return (cbRemoved >= cbData);
385}
386
387#else
388
389static size_t pdmacFileCacheReplace(PPDMACFILECACHEGLOBAL pCache, size_t cbData, PPDMACFILELRULIST pEntryList,
390 bool fReuseBuffer, uint8_t **ppbBuffer)
391{
392 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
393
394 if ( (pCache->LruRecentlyUsed.cbCached)
395 && ( (pCache->LruRecentlyUsed.cbCached > pCache->uAdaptVal)
396 || ( (pEntryList == &pCache->LruFrequentlyGhost)
397 && (pCache->LruRecentlyUsed.cbCached == pCache->uAdaptVal))))
398 {
399 /* We need to remove entry size pages from T1 and move the entries to B1 */
400 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
401 &pCache->LruRecentlyUsed,
402 &pCache->LruRecentlyGhost,
403 fReuseBuffer, ppbBuffer);
404 }
405 else
406 {
407 /* We need to remove entry size pages from T2 and move the entries to B2 */
408 return pdmacFileCacheEvictPagesFrom(pCache, cbData,
409 &pCache->LruFrequentlyUsed,
410 &pCache->LruFrequentlyGhost,
411 fReuseBuffer, ppbBuffer);
412 }
413}
414
415/**
416 * Tries to evict the given amount of the data from the cache.
417 *
418 * @returns Bytes removed.
419 * @param pCache The global cache data.
420 * @param cbData Number of bytes to evict.
421 */
422static size_t pdmacFileCacheEvict(PPDMACFILECACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
423{
424 size_t cbRemoved = ~0;
425
426 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
427
428 if ((pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached) >= pCache->cbMax)
429 {
430 /* Delete desired pages from the cache. */
431 if (pCache->LruRecentlyUsed.cbCached < pCache->cbMax)
432 {
433 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
434 &pCache->LruRecentlyGhost,
435 NULL,
436 fReuseBuffer, ppbBuffer);
437 }
438 else
439 {
440 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
441 &pCache->LruRecentlyUsed,
442 NULL,
443 fReuseBuffer, ppbBuffer);
444 }
445 }
446 else
447 {
448 uint32_t cbUsed = pCache->LruRecentlyUsed.cbCached + pCache->LruRecentlyGhost.cbCached +
449 pCache->LruFrequentlyUsed.cbCached + pCache->LruFrequentlyGhost.cbCached;
450
451 if (cbUsed >= pCache->cbMax)
452 {
453 if (cbUsed == 2*pCache->cbMax)
454 cbRemoved = pdmacFileCacheEvictPagesFrom(pCache, cbData,
455 &pCache->LruFrequentlyGhost,
456 NULL,
457 fReuseBuffer, ppbBuffer);
458
459 if (cbRemoved >= cbData)
460 cbRemoved = pdmacFileCacheReplace(pCache, cbData, NULL, fReuseBuffer, ppbBuffer);
461 }
462 }
463
464 return cbRemoved;
465}
466
467/**
468 * Updates the cache parameters
469 *
470 * @returns nothing.
471 * @param pCache The global cache data.
472 * @param pEntry The entry usign for the update.
473 */
474static void pdmacFileCacheUpdate(PPDMACFILECACHEGLOBAL pCache, PPDMACFILECACHEENTRY pEntry)
475{
476 int32_t uUpdateVal = 0;
477
478 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
479
480 /* Update parameters */
481 if (pEntry->pList == &pCache->LruRecentlyGhost)
482 {
483 if (pCache->LruRecentlyGhost.cbCached >= pCache->LruFrequentlyGhost.cbCached)
484 uUpdateVal = 1;
485 else
486 uUpdateVal = pCache->LruFrequentlyGhost.cbCached / pCache->LruRecentlyGhost.cbCached;
487
488 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal + uUpdateVal, pCache->cbMax);
489 }
490 else if (pEntry->pList == &pCache->LruFrequentlyGhost)
491 {
492 if (pCache->LruFrequentlyGhost.cbCached >= pCache->LruRecentlyGhost.cbCached)
493 uUpdateVal = 1;
494 else
495 uUpdateVal = pCache->LruRecentlyGhost.cbCached / pCache->LruFrequentlyGhost.cbCached;
496
497 pCache->uAdaptVal = RT_MIN(pCache->uAdaptVal - uUpdateVal, 0);
498 }
499 else
500 AssertMsgFailed(("Invalid list type\n"));
501}
502#endif
503
504/**
505 * Initiates a read I/O task for the given entry.
506 *
507 * @returns nothing.
508 * @param pEntry The entry to fetch the data to.
509 */
510static void pdmacFileCacheReadFromEndpoint(PPDMACFILECACHEENTRY pEntry)
511{
512 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
513
514 /* Make sure no one evicts the entry while it is accessed. */
515 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
516
517 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
518 AssertPtr(pIoTask);
519
520 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
521
522 pIoTask->pEndpoint = pEntry->pEndpoint;
523 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
524 pIoTask->Off = pEntry->Core.Key;
525 pIoTask->DataSeg.cbSeg = pEntry->cbData;
526 pIoTask->DataSeg.pvSeg = pEntry->pbData;
527 pIoTask->pvUser = pEntry;
528 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
529
530 /* Send it off to the I/O manager. */
531 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
532}
533
534/**
535 * Initiates a write I/O task for the given entry.
536 *
537 * @returns nothing.
538 * @param pEntry The entry to read the data from.
539 */
540static void pdmacFileCacheWriteToEndpoint(PPDMACFILECACHEENTRY pEntry)
541{
542 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
543
544 /* Make sure no one evicts the entry while it is accessed. */
545 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
546
547 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEntry->pEndpoint);
548 AssertPtr(pIoTask);
549
550 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
551
552 pIoTask->pEndpoint = pEntry->pEndpoint;
553 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
554 pIoTask->Off = pEntry->Core.Key;
555 pIoTask->DataSeg.cbSeg = pEntry->cbData;
556 pIoTask->DataSeg.pvSeg = pEntry->pbData;
557 pIoTask->pvUser = pEntry;
558 pIoTask->pfnCompleted = pdmacFileCacheTaskCompleted;
559 ASMAtomicIncU32(&pEntry->pEndpoint->DataCache.cWritesOutstanding);
560
561 /* Send it off to the I/O manager. */
562 pdmacFileEpAddTask(pEntry->pEndpoint, pIoTask);
563}
564
565/**
566 * Completes a task segment freeing all ressources and completes the task handle
567 * if everything was transfered.
568 *
569 * @returns Next task segment handle.
570 * @param pEndpointCache The endpoint cache.
571 * @param pTaskSeg Task segment to complete.
572 */
573static PPDMACFILETASKSEG pdmacFileCacheTaskComplete(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILETASKSEG pTaskSeg)
574{
575 PPDMACFILETASKSEG pNext = pTaskSeg->pNext;
576
577 uint32_t uOld = ASMAtomicSubU32(&pTaskSeg->pTask->cbTransferLeft, pTaskSeg->cbTransfer);
578 AssertMsg(uOld >= pTaskSeg->cbTransfer, ("New value would overflow\n"));
579 if (!(uOld - pTaskSeg->cbTransfer)
580 && !ASMAtomicXchgBool(&pTaskSeg->pTask->fCompleted, true))
581 pdmR3AsyncCompletionCompleteTask(&pTaskSeg->pTask->Core, true);
582
583 RTMemFree(pTaskSeg);
584
585 return pNext;
586}
587
588/**
589 * Completion callback for I/O tasks.
590 *
591 * @returns nothing.
592 * @param pTask The completed task.
593 * @param pvUser Opaque user data.
594 */
595static void pdmacFileCacheTaskCompleted(PPDMACTASKFILE pTask, void *pvUser)
596{
597 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pvUser;
598 PPDMACFILECACHEGLOBAL pCache = pEntry->pCache;
599 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pEntry->pEndpoint;
600 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
601
602 /* Reference the entry now as we are clearing the I/O in progres flag
603 * which protects the entry till now. */
604 pdmacFileEpCacheEntryRef(pEntry);
605
606 RTSemRWRequestWrite(pEndpoint->DataCache.SemRWEntries, RT_INDEFINITE_WAIT);
607 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IO_IN_PROGRESS;
608
609 /* Process waiting segment list. The data in entry might have changed inbetween. */
610 PPDMACFILETASKSEG pCurr = pEntry->pWaitingHead;
611
612 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
613 ("The list tail was not updated correctly\n"));
614 pEntry->pWaitingTail = NULL;
615 pEntry->pWaitingHead = NULL;
616
617 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
618 {
619 AssertMsg(pEndpointCache->cWritesOutstanding > 0, ("Completed write request but outstanding task count is 0\n"));
620 ASMAtomicDecU32(&pEndpointCache->cWritesOutstanding);
621
622 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DEPRECATED)
623 {
624 AssertMsg(!pCurr, ("The entry is deprecated but has waiting write segments attached\n"));
625
626 RTMemPageFree(pEntry->pbData);
627 pEntry->pbData = pEntry->pbDataReplace;
628 pEntry->pbDataReplace = NULL;
629 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DEPRECATED;
630 }
631 else
632 {
633 pEntry->fFlags &= ~PDMACFILECACHE_ENTRY_IS_DIRTY;
634
635 while (pCurr)
636 {
637 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
638
639 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
640 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
641
642 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
643 }
644 }
645 }
646 else
647 {
648 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_READ, ("Invalid transfer type\n"));
649 AssertMsg(!(pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY),("Invalid flags set\n"));
650
651 while (pCurr)
652 {
653 if (pCurr->fWrite)
654 {
655 memcpy(pEntry->pbData + pCurr->uBufOffset, pCurr->pvBuf, pCurr->cbTransfer);
656 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
657 }
658 else
659 memcpy(pCurr->pvBuf, pEntry->pbData + pCurr->uBufOffset, pCurr->cbTransfer);
660
661 pCurr = pdmacFileCacheTaskComplete(pEndpointCache, pCurr);
662 }
663 }
664
665 if (pEntry->fFlags & PDMACFILECACHE_ENTRY_IS_DIRTY)
666 pdmacFileCacheWriteToEndpoint(pEntry);
667
668 /* Complete a pending flush if all writes have completed */
669 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
670 {
671 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
672 if (pTaskFlush)
673 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
674 }
675
676 RTSemRWReleaseWrite(pEndpoint->DataCache.SemRWEntries);
677
678 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
679 pdmacFileEpCacheEntryRelease(pEntry);
680}
681
682/**
683 * Initializies the I/O cache.
684 *
685 * returns VBox status code.
686 * @param pClassFile The global class data for file endpoints.
687 * @param pCfgNode CFGM node to query configuration data from.
688 */
689int pdmacFileCacheInit(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile, PCFGMNODE pCfgNode)
690{
691 int rc = VINF_SUCCESS;
692 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
693
694 rc = CFGMR3QueryU32Def(pCfgNode, "CacheSize", &pCache->cbMax, 5 * _1M);
695 AssertLogRelRCReturn(rc, rc);
696
697 pCache->cbCached = 0;
698 LogFlowFunc((": Maximum number of bytes cached %u\n", pCache->cbMax));
699
700 /* Initialize members */
701#ifdef VBOX_WITH_2Q_CACHE
702 pCache->LruRecentlyUsedIn.pHead = NULL;
703 pCache->LruRecentlyUsedIn.pTail = NULL;
704 pCache->LruRecentlyUsedIn.cbCached = 0;
705
706 pCache->LruRecentlyUsedOut.pHead = NULL;
707 pCache->LruRecentlyUsedOut.pTail = NULL;
708 pCache->LruRecentlyUsedOut.cbCached = 0;
709
710 pCache->LruFrequentlyUsed.pHead = NULL;
711 pCache->LruFrequentlyUsed.pTail = NULL;
712 pCache->LruFrequentlyUsed.cbCached = 0;
713
714 pCache->cbRecentlyUsedInMax = (pCache->cbMax / 100) * 25; /* 25% of the buffer size */
715 pCache->cbRecentlyUsedOutMax = (pCache->cbMax / 100) * 50; /* 50% of the buffer size */
716 LogFlowFunc((": cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n", pCache->cbRecentlyUsedInMax, pCache->cbRecentlyUsedOutMax));
717#else
718 pCache->LruRecentlyUsed.pHead = NULL;
719 pCache->LruRecentlyUsed.pTail = NULL;
720 pCache->LruRecentlyUsed.cbCached = 0;
721
722 pCache->LruFrequentlyUsed.pHead = NULL;
723 pCache->LruFrequentlyUsed.pTail = NULL;
724 pCache->LruFrequentlyUsed.cbCached = 0;
725
726 pCache->LruRecentlyGhost.pHead = NULL;
727 pCache->LruRecentlyGhost.pTail = NULL;
728 pCache->LruRecentlyGhost.cbCached = 0;
729
730 pCache->LruFrequentlyGhost.pHead = NULL;
731 pCache->LruFrequentlyGhost.pTail = NULL;
732 pCache->LruFrequentlyGhost.cbCached = 0;
733
734 pCache->uAdaptVal = 0;
735#endif
736
737 STAMR3Register(pClassFile->Core.pVM, &pCache->cbMax,
738 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
739 "/PDM/AsyncCompletion/File/cbMax",
740 STAMUNIT_BYTES,
741 "Maximum cache size");
742 STAMR3Register(pClassFile->Core.pVM, &pCache->cbCached,
743 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
744 "/PDM/AsyncCompletion/File/cbCached",
745 STAMUNIT_BYTES,
746 "Currently used cache");
747#ifdef VBOX_WITH_2Q_CACHE
748 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedIn.cbCached,
749 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
750 "/PDM/AsyncCompletion/File/cbCachedMruIn",
751 STAMUNIT_BYTES,
752 "Number of bytes cached in MRU list");
753 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsedOut.cbCached,
754 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
755 "/PDM/AsyncCompletion/File/cbCachedMruOut",
756 STAMUNIT_BYTES,
757 "Number of bytes cached in FRU list");
758 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
759 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
760 "/PDM/AsyncCompletion/File/cbCachedFru",
761 STAMUNIT_BYTES,
762 "Number of bytes cached in FRU ghost list");
763#else
764 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyUsed.cbCached,
765 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
766 "/PDM/AsyncCompletion/File/cbCachedMru",
767 STAMUNIT_BYTES,
768 "Number of bytes cached in Mru list");
769 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyUsed.cbCached,
770 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
771 "/PDM/AsyncCompletion/File/cbCachedFru",
772 STAMUNIT_BYTES,
773 "Number of bytes cached in Fru list");
774 STAMR3Register(pClassFile->Core.pVM, &pCache->LruRecentlyGhost.cbCached,
775 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
776 "/PDM/AsyncCompletion/File/cbCachedMruGhost",
777 STAMUNIT_BYTES,
778 "Number of bytes cached in Mru ghost list");
779 STAMR3Register(pClassFile->Core.pVM, &pCache->LruFrequentlyGhost.cbCached,
780 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
781 "/PDM/AsyncCompletion/File/cbCachedFruGhost",
782 STAMUNIT_BYTES, "Number of bytes cached in Fru ghost list");
783#endif
784
785#ifdef VBOX_WITH_STATISTICS
786 STAMR3Register(pClassFile->Core.pVM, &pCache->cHits,
787 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
788 "/PDM/AsyncCompletion/File/CacheHits",
789 STAMUNIT_COUNT, "Number of hits in the cache");
790 STAMR3Register(pClassFile->Core.pVM, &pCache->cPartialHits,
791 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
792 "/PDM/AsyncCompletion/File/CachePartialHits",
793 STAMUNIT_COUNT, "Number of partial hits in the cache");
794 STAMR3Register(pClassFile->Core.pVM, &pCache->cMisses,
795 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
796 "/PDM/AsyncCompletion/File/CacheMisses",
797 STAMUNIT_COUNT, "Number of misses when accessing the cache");
798 STAMR3Register(pClassFile->Core.pVM, &pCache->StatRead,
799 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
800 "/PDM/AsyncCompletion/File/CacheRead",
801 STAMUNIT_BYTES, "Number of bytes read from the cache");
802 STAMR3Register(pClassFile->Core.pVM, &pCache->StatWritten,
803 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
804 "/PDM/AsyncCompletion/File/CacheWritten",
805 STAMUNIT_BYTES, "Number of bytes written to the cache");
806 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeGet,
807 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
808 "/PDM/AsyncCompletion/File/CacheTreeGet",
809 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
810 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeInsert,
811 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
812 "/PDM/AsyncCompletion/File/CacheTreeInsert",
813 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
814 STAMR3Register(pClassFile->Core.pVM, &pCache->StatTreeRemove,
815 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
816 "/PDM/AsyncCompletion/File/CacheTreeRemove",
817 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
818 STAMR3Register(pClassFile->Core.pVM, &pCache->StatBuffersReused,
819 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
820 "/PDM/AsyncCompletion/File/CacheBuffersReused",
821 STAMUNIT_COUNT, "Number of times a buffer could be reused");
822#ifndef VBOX_WITH_2Q_CACHE
823 STAMR3Register(pClassFile->Core.pVM, &pCache->uAdaptVal,
824 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
825 "/PDM/AsyncCompletion/File/CacheAdaptValue",
826 STAMUNIT_COUNT,
827 "Adaption value of the cache");
828#endif
829#endif
830
831 /* Initialize the critical section */
832 rc = RTCritSectInit(&pCache->CritSect);
833
834 if (RT_SUCCESS(rc))
835 LogRel(("AIOMgr: Cache successfully initialised. Cache size is %u bytes\n", pCache->cbMax));
836
837 return rc;
838}
839
840/**
841 * Destroysthe cache freeing all data.
842 *
843 * returns nothing.
844 * @param pClassFile The global class data for file endpoints.
845 */
846void pdmacFileCacheDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
847{
848 PPDMACFILECACHEGLOBAL pCache = &pClassFile->Cache;
849
850 /* Make sure no one else uses the cache now */
851 RTCritSectEnter(&pCache->CritSect);
852
853#ifdef VBOX_WITH_2Q_CACHE
854 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
855 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedIn);
856 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsedOut);
857 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
858#else
859 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
860 pdmacFileCacheDestroyList(&pCache->LruRecentlyUsed);
861 pdmacFileCacheDestroyList(&pCache->LruFrequentlyUsed);
862 pdmacFileCacheDestroyList(&pCache->LruRecentlyGhost);
863 pdmacFileCacheDestroyList(&pCache->LruFrequentlyGhost);
864#endif
865
866 RTCritSectLeave(&pCache->CritSect);
867
868 RTCritSectDelete(&pCache->CritSect);
869}
870
871/**
872 * Initializes per endpoint cache data
873 * like the AVL tree used to access cached entries.
874 *
875 * @returns VBox status code.
876 * @param pEndpoint The endpoint to init the cache for,
877 * @param pClassFile The global class data for file endpoints.
878 */
879int pdmacFileEpCacheInit(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONEPCLASSFILE pClassFile)
880{
881 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
882
883 pEndpointCache->pCache = &pClassFile->Cache;
884
885 int rc = RTSemRWCreate(&pEndpointCache->SemRWEntries);
886 if (RT_SUCCESS(rc))
887 {
888 pEndpointCache->pTree = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
889 if (!pEndpointCache->pTree)
890 {
891 rc = VERR_NO_MEMORY;
892 RTSemRWDestroy(pEndpointCache->SemRWEntries);
893 }
894 }
895
896#ifdef VBOX_WITH_STATISTICS
897 if (RT_SUCCESS(rc))
898 {
899 STAMR3RegisterF(pClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred,
900 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
901 STAMUNIT_COUNT, "Number of deferred writes",
902 "/PDM/AsyncCompletion/File/%s/Cache/DeferredWrites", RTPathFilename(pEndpoint->Core.pszUri));
903 }
904#endif
905
906 return rc;
907}
908
909/**
910 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
911 *
912 * @returns IPRT status code.
913 * @param pNode The node to destroy.
914 * @param pvUser Opaque user data.
915 */
916static int pdmacFileEpCacheEntryDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
917{
918 PPDMACFILECACHEENTRY pEntry = (PPDMACFILECACHEENTRY)pNode;
919 PPDMACFILECACHEGLOBAL pCache = (PPDMACFILECACHEGLOBAL)pvUser;
920 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEntry->pEndpoint->DataCache;
921
922 while (pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY))
923 {
924 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
925 RTThreadSleep(250);
926 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
927 }
928
929 AssertMsg(!(pEntry->fFlags & (PDMACFILECACHE_ENTRY_IO_IN_PROGRESS | PDMACFILECACHE_ENTRY_IS_DIRTY)),
930 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
931
932 pdmacFileCacheEntryRemoveFromList(pEntry);
933 pCache->cbCached -= pEntry->cbData;
934
935 RTMemPageFree(pEntry->pbData);
936 RTMemFree(pEntry);
937
938 return VINF_SUCCESS;
939}
940
941/**
942 * Destroys all cache ressources used by the given endpoint.
943 *
944 * @returns nothing.
945 * @param pEndpoint The endpoint to the destroy.
946 */
947void pdmacFileEpCacheDestroy(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
948{
949 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
950 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
951
952 /* Make sure nobody is accessing the cache while we delete the tree. */
953 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
954 RTCritSectEnter(&pCache->CritSect);
955 RTAvlrFileOffsetDestroy(pEndpointCache->pTree, pdmacFileEpCacheEntryDestroy, pCache);
956 RTCritSectLeave(&pCache->CritSect);
957 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
958
959 RTSemRWDestroy(pEndpointCache->SemRWEntries);
960
961#ifdef VBOX_WITH_STATISTICS
962 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
963
964 STAMR3Deregister(pEpClassFile->Core.pVM, &pEndpointCache->StatWriteDeferred);
965#endif
966}
967
968static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
969{
970 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
971 PPDMACFILECACHEENTRY pEntry = NULL;
972
973 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
974
975 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
976 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetRangeGet(pEndpointCache->pTree, off);
977 if (pEntry)
978 pdmacFileEpCacheEntryRef(pEntry);
979 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
980
981 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
982
983 return pEntry;
984}
985
986static PPDMACFILECACHEENTRY pdmacFileEpCacheGetCacheBestFitEntryByOffset(PPDMACFILEENDPOINTCACHE pEndpointCache, RTFOFF off)
987{
988 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
989 PPDMACFILECACHEENTRY pEntry = NULL;
990
991 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
992
993 RTSemRWRequestRead(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
994 pEntry = (PPDMACFILECACHEENTRY)RTAvlrFileOffsetGetBestFit(pEndpointCache->pTree, off, true /*fAbove*/);
995 if (pEntry)
996 pdmacFileEpCacheEntryRef(pEntry);
997 RTSemRWReleaseRead(pEndpointCache->SemRWEntries);
998
999 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1000
1001 return pEntry;
1002}
1003
1004static void pdmacFileEpCacheInsertEntry(PPDMACFILEENDPOINTCACHE pEndpointCache, PPDMACFILECACHEENTRY pEntry)
1005{
1006 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1007
1008 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1009 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1010 bool fInserted = RTAvlrFileOffsetInsert(pEndpointCache->pTree, &pEntry->Core);
1011 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1012 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1013 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1014}
1015
1016/**
1017 * Allocates and initializes a new entry for the cache.
1018 * The entry has a reference count of 1.
1019 *
1020 * @returns Pointer to the new cache entry or NULL if out of memory.
1021 * @param pCache The cache the entry belongs to.
1022 * @param pEndoint The endpoint the entry holds data for.
1023 * @param off Start offset.
1024 * @param cbData Size of the cache entry.
1025 * @param pbBuffer Pointer to the buffer to use.
1026 * NULL if a new buffer should be allocated.
1027 * The buffer needs to have the same size of the entry.
1028 */
1029static PPDMACFILECACHEENTRY pdmacFileCacheEntryAlloc(PPDMACFILECACHEGLOBAL pCache,
1030 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
1031 RTFOFF off, size_t cbData, uint8_t *pbBuffer)
1032{
1033 PPDMACFILECACHEENTRY pEntryNew = (PPDMACFILECACHEENTRY)RTMemAllocZ(sizeof(PDMACFILECACHEENTRY));
1034
1035 if (RT_UNLIKELY(!pEntryNew))
1036 return NULL;
1037
1038 pEntryNew->Core.Key = off;
1039 pEntryNew->Core.KeyLast = off + cbData - 1;
1040 pEntryNew->pEndpoint = pEndpoint;
1041 pEntryNew->pCache = pCache;
1042 pEntryNew->fFlags = 0;
1043 pEntryNew->cRefs = 1; /* We are using it now. */
1044 pEntryNew->pList = NULL;
1045 pEntryNew->cbData = cbData;
1046 pEntryNew->pWaitingHead = NULL;
1047 pEntryNew->pWaitingTail = NULL;
1048 pEntryNew->pbDataReplace = NULL;
1049 if (pbBuffer)
1050 pEntryNew->pbData = pbBuffer;
1051 else
1052 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1053
1054 if (RT_UNLIKELY(!pEntryNew->pbData))
1055 {
1056 RTMemFree(pEntryNew);
1057 return NULL;
1058 }
1059
1060 return pEntryNew;
1061}
1062
1063/**
1064 * Adds a segment to the waiting list for a cache entry
1065 * which is currently in progress.
1066 *
1067 * @returns nothing.
1068 * @param pEntry The cache entry to add the segment to.
1069 * @param pSeg The segment to add.
1070 */
1071DECLINLINE(void) pdmacFileEpCacheEntryAddWaitingSegment(PPDMACFILECACHEENTRY pEntry, PPDMACFILETASKSEG pSeg)
1072{
1073 pSeg->pNext = NULL;
1074
1075 if (pEntry->pWaitingHead)
1076 {
1077 AssertPtr(pEntry->pWaitingTail);
1078
1079 pEntry->pWaitingTail->pNext = pSeg;
1080 pEntry->pWaitingTail = pSeg;
1081 }
1082 else
1083 {
1084 Assert(!pEntry->pWaitingTail);
1085
1086 pEntry->pWaitingHead = pSeg;
1087 pEntry->pWaitingTail = pSeg;
1088 }
1089}
1090
1091/**
1092 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1093 * in exclusive mode.
1094 *
1095 * @returns true if the flag in fSet is set and the one in fClear is clear.
1096 * false othwerise.
1097 * The R/W semaphore is only held if true is returned.
1098 *
1099 * @param pEndpointCache The endpoint cache instance data.
1100 * @param pEntry The entry to check the flags for.
1101 * @param fSet The flag which is tested to be set.
1102 * @param fClear The flag which is tested to be clear.
1103 */
1104DECLINLINE(bool) pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(PPDMACFILEENDPOINTCACHE pEndpointCache,
1105 PPDMACFILECACHEENTRY pEntry,
1106 uint32_t fSet, uint32_t fClear)
1107{
1108 bool fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1109
1110 if (fPassed)
1111 {
1112 /* Acquire the lock and check again becuase the completion callback might have raced us. */
1113 RTSemRWRequestWrite(pEndpointCache->SemRWEntries, RT_INDEFINITE_WAIT);
1114
1115 fPassed = ((pEntry->fFlags & fSet) && !(pEntry->fFlags & fClear));
1116
1117 /* Drop the lock if we didn't passed the test. */
1118 if (!fPassed)
1119 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1120 }
1121
1122 return fPassed;
1123}
1124
1125/**
1126 * Advances the current segment buffer by the number of bytes transfered
1127 * or gets the next segment.
1128 */
1129#define ADVANCE_SEGMENT_BUFFER(BytesTransfered) \
1130 do \
1131 { \
1132 cbSegLeft -= BytesTransfered; \
1133 if (!cbSegLeft) \
1134 { \
1135 iSegCurr++; \
1136 cbSegLeft = paSegments[iSegCurr].cbSeg; \
1137 pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg; \
1138 } \
1139 else \
1140 pbSegBuf += BytesTransfered; \
1141 } \
1142 while (0)
1143
1144/**
1145 * Reads the specified data from the endpoint using the cache if possible.
1146 *
1147 * @returns VBox status code.
1148 * @param pEndpoint The endpoint to read from.
1149 * @param pTask The task structure used as identifier for this request.
1150 * @param off The offset to start reading from.
1151 * @param paSegments Pointer to the array holding the destination buffers.
1152 * @param cSegments Number of segments in the array.
1153 * @param cbRead Number of bytes to read.
1154 */
1155int pdmacFileEpCacheRead(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1156 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1157 size_t cbRead)
1158{
1159 int rc = VINF_SUCCESS;
1160 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1161 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1162 PPDMACFILECACHEENTRY pEntry;
1163
1164 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbRead=%u\n",
1165 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbRead));
1166
1167 pTask->cbTransferLeft = cbRead;
1168 /* Set to completed to make sure that the task is valid while we access it. */
1169 ASMAtomicWriteBool(&pTask->fCompleted, true);
1170
1171 int iSegCurr = 0;
1172 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1173 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1174
1175 while (cbRead)
1176 {
1177 size_t cbToRead;
1178
1179 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1180
1181 /*
1182 * If there is no entry we try to create a new one eviciting unused pages
1183 * if the cache is full. If this is not possible we will pass the request through
1184 * and skip the caching (all entries may be still in progress so they can't
1185 * be evicted)
1186 * If we have an entry it can be in one of the LRU lists where the entry
1187 * contains data (recently used or frequently used LRU) so we can just read
1188 * the data we need and put the entry at the head of the frequently used LRU list.
1189 * In case the entry is in one of the ghost lists it doesn't contain any data.
1190 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1191 */
1192 if (pEntry)
1193 {
1194 RTFOFF OffDiff = off - pEntry->Core.Key;
1195
1196 AssertMsg(off >= pEntry->Core.Key,
1197 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1198 off, pEntry->Core.Key));
1199
1200 AssertPtr(pEntry->pList);
1201
1202 cbToRead = RT_MIN(pEntry->cbData - OffDiff, cbRead);
1203 cbRead -= cbToRead;
1204
1205 if (!cbRead)
1206 STAM_COUNTER_INC(&pCache->cHits);
1207 else
1208 STAM_COUNTER_INC(&pCache->cPartialHits);
1209
1210 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1211
1212 /* Ghost lists contain no data. */
1213#ifdef VBOX_WITH_2Q_CACHE
1214 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1215 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1216 {
1217#else
1218 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1219 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1220 {
1221#endif
1222 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1223 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1224 0))
1225 {
1226 /* Entry is deprecated. Read data from the new buffer. */
1227 while (cbToRead)
1228 {
1229 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1230
1231 memcpy(pbSegBuf, pEntry->pbDataReplace + OffDiff, cbCopy);
1232
1233 ADVANCE_SEGMENT_BUFFER(cbCopy);
1234
1235 cbToRead -= cbCopy;
1236 off += cbCopy;
1237 OffDiff += cbCopy;
1238 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1239 }
1240 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1241 }
1242 else
1243 {
1244 if (pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1245 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1246 PDMACFILECACHE_ENTRY_IS_DIRTY))
1247 {
1248 /* Entry didn't completed yet. Append to the list */
1249 while (cbToRead)
1250 {
1251 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1252
1253 pSeg->pTask = pTask;
1254 pSeg->uBufOffset = OffDiff;
1255 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1256 pSeg->pvBuf = pbSegBuf;
1257 pSeg->fWrite = false;
1258
1259 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1260
1261 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1262
1263 off += pSeg->cbTransfer;
1264 cbToRead -= pSeg->cbTransfer;
1265 OffDiff += pSeg->cbTransfer;
1266 }
1267 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1268 }
1269 else
1270 {
1271 /* Read as much as we can from the entry. */
1272 while (cbToRead)
1273 {
1274 size_t cbCopy = RT_MIN(cbSegLeft, cbToRead);
1275
1276 memcpy(pbSegBuf, pEntry->pbData + OffDiff, cbCopy);
1277
1278 ADVANCE_SEGMENT_BUFFER(cbCopy);
1279
1280 cbToRead -= cbCopy;
1281 off += cbCopy;
1282 OffDiff += cbCopy;
1283 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1284 }
1285 }
1286 }
1287
1288 /* Move this entry to the top position */
1289#ifdef VBOX_WITH_2Q_CACHE
1290 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1291 {
1292 RTCritSectEnter(&pCache->CritSect);
1293 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1294 RTCritSectLeave(&pCache->CritSect);
1295 }
1296#else
1297 RTCritSectEnter(&pCache->CritSect);
1298 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1299 RTCritSectLeave(&pCache->CritSect);
1300#endif
1301 }
1302 else
1303 {
1304 uint8_t *pbBuffer = NULL;
1305
1306 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1307
1308#ifdef VBOX_WITH_2Q_CACHE
1309 RTCritSectEnter(&pCache->CritSect);
1310 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1311 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1312
1313 /* Move the entry to Am and fetch it to the cache. */
1314 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1315 RTCritSectLeave(&pCache->CritSect);
1316#else
1317 RTCritSectEnter(&pCache->CritSect);
1318 pdmacFileCacheUpdate(pCache, pEntry);
1319 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1320
1321 /* Move the entry to T2 and fetch it to the cache. */
1322 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1323 RTCritSectLeave(&pCache->CritSect);
1324#endif
1325
1326 if (pbBuffer)
1327 pEntry->pbData = pbBuffer;
1328 else
1329 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1330 AssertPtr(pEntry->pbData);
1331
1332 while (cbToRead)
1333 {
1334 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1335
1336 AssertMsg(off >= pEntry->Core.Key,
1337 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1338 off, pEntry->Core.Key));
1339
1340 pSeg->pTask = pTask;
1341 pSeg->uBufOffset = OffDiff;
1342 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1343 pSeg->pvBuf = pbSegBuf;
1344
1345 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1346
1347 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1348
1349 off += pSeg->cbTransfer;
1350 OffDiff += pSeg->cbTransfer;
1351 cbToRead -= pSeg->cbTransfer;
1352 }
1353
1354 pdmacFileCacheReadFromEndpoint(pEntry);
1355 }
1356 pdmacFileEpCacheEntryRelease(pEntry);
1357 }
1358 else
1359 {
1360 /* No entry found for this offset. Get best fit entry and fetch the data to the cache. */
1361 size_t cbToReadAligned;
1362 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1363
1364 LogFlow(("%sbest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1365 pEntryBestFit ? "" : "No ",
1366 off,
1367 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1368 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1369 pEntryBestFit ? pEntryBestFit->cbData : 0));
1370
1371 if ( pEntryBestFit
1372 && off + (RTFOFF)cbRead > pEntryBestFit->Core.Key)
1373 {
1374 cbToRead = pEntryBestFit->Core.Key - off;
1375 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1376 cbToReadAligned = cbToRead;
1377 }
1378 else
1379 {
1380 /*
1381 * Align the size to a 4KB boundary.
1382 * Memory size is aligned to a page boundary
1383 * and memory is wasted if the size is rahter small.
1384 * (For example reads with a size of 512 bytes.
1385 */
1386 cbToRead = cbRead;
1387 cbToReadAligned = RT_ALIGN_Z(cbRead, PAGE_SIZE);
1388
1389 /* Clip read to file size */
1390 cbToReadAligned = RT_MIN(pEndpoint->cbFile - off, cbToReadAligned);
1391 if (pEntryBestFit)
1392 {
1393 Assert(pEntryBestFit->Core.Key >= off);
1394 cbToReadAligned = RT_MIN(cbToReadAligned, (uint64_t)pEntryBestFit->Core.Key - off);
1395 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1396 }
1397 }
1398
1399 cbRead -= cbToRead;
1400
1401 if (!cbRead)
1402 STAM_COUNTER_INC(&pCache->cMisses);
1403 else
1404 STAM_COUNTER_INC(&pCache->cPartialHits);
1405
1406 uint8_t *pbBuffer = NULL;
1407
1408#ifdef VBOX_WITH_2Q_CACHE
1409 RTCritSectEnter(&pCache->CritSect);
1410 bool fEnough = pdmacFileCacheReclaim(pCache, cbToReadAligned, true, &pbBuffer);
1411 RTCritSectLeave(&pCache->CritSect);
1412
1413 if (fEnough)
1414 {
1415 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToReadAligned));
1416#else
1417 RTCritSectEnter(&pCache->CritSect);
1418 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToReadAligned, true, &pbBuffer);
1419 RTCritSectLeave(&pCache->CritSect);
1420
1421 if (cbRemoved >= cbToReadAligned)
1422 {
1423 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToReadAligned));
1424#endif
1425 PPDMACFILECACHEENTRY pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToReadAligned, pbBuffer);
1426 AssertPtr(pEntryNew);
1427
1428 RTCritSectEnter(&pCache->CritSect);
1429#ifdef VBOX_WITH_2Q_CACHE
1430 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1431#else
1432 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1433#endif
1434 pCache->cbCached += cbToReadAligned;
1435 RTCritSectLeave(&pCache->CritSect);
1436
1437 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1438 uint32_t uBufOffset = 0;
1439
1440 while (cbToRead)
1441 {
1442 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1443
1444 pSeg->pTask = pTask;
1445 pSeg->uBufOffset = uBufOffset;
1446 pSeg->cbTransfer = RT_MIN(cbToRead, cbSegLeft);
1447 pSeg->pvBuf = pbSegBuf;
1448
1449 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1450
1451 pdmacFileEpCacheEntryAddWaitingSegment(pEntryNew, pSeg);
1452
1453 off += pSeg->cbTransfer;
1454 cbToRead -= pSeg->cbTransfer;
1455 uBufOffset += pSeg->cbTransfer;
1456 }
1457
1458 pdmacFileCacheReadFromEndpoint(pEntryNew);
1459 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1460 }
1461 else
1462 {
1463 /*
1464 * There is not enough free space in the cache.
1465 * Pass the request directly to the I/O manager.
1466 */
1467 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1468
1469 while (cbToRead)
1470 {
1471 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1472 AssertPtr(pIoTask);
1473
1474 pIoTask->pEndpoint = pEndpoint;
1475 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_READ;
1476 pIoTask->Off = off;
1477 pIoTask->DataSeg.cbSeg = RT_MIN(cbToRead, cbSegLeft);
1478 pIoTask->DataSeg.pvSeg = pbSegBuf;
1479 pIoTask->pvUser = pTask;
1480 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1481
1482 off += pIoTask->DataSeg.cbSeg;
1483 cbToRead -= pIoTask->DataSeg.cbSeg;
1484
1485 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1486
1487 /* Send it off to the I/O manager. */
1488 pdmacFileEpAddTask(pEndpoint, pIoTask);
1489 }
1490 }
1491 }
1492 }
1493
1494 ASMAtomicWriteBool(&pTask->fCompleted, false);
1495
1496 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1497 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1498 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1499 else
1500 rc = VINF_AIO_TASK_PENDING;
1501
1502 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1503
1504 return rc;
1505}
1506
1507/**
1508 * Writes the given data to the endpoint using the cache if possible.
1509 *
1510 * @returns VBox status code.
1511 * @param pEndpoint The endpoint to write to.
1512 * @param pTask The task structure used as identifier for this request.
1513 * @param off The offset to start writing to
1514 * @param paSegments Pointer to the array holding the source buffers.
1515 * @param cSegments Number of segments in the array.
1516 * @param cbWrite Number of bytes to write.
1517 */
1518int pdmacFileEpCacheWrite(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask,
1519 RTFOFF off, PCPDMDATASEG paSegments, size_t cSegments,
1520 size_t cbWrite)
1521{
1522 int rc = VINF_SUCCESS;
1523 PPDMACFILEENDPOINTCACHE pEndpointCache = &pEndpoint->DataCache;
1524 PPDMACFILECACHEGLOBAL pCache = pEndpointCache->pCache;
1525 PPDMACFILECACHEENTRY pEntry;
1526
1527 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p off=%RTfoff paSegments=%#p cSegments=%u cbWrite=%u\n",
1528 pEndpoint, pEndpoint->Core.pszUri, pTask, off, paSegments, cSegments, cbWrite));
1529
1530 pTask->cbTransferLeft = cbWrite;
1531 /* Set to completed to make sure that the task is valid while we access it. */
1532 ASMAtomicWriteBool(&pTask->fCompleted, true);
1533
1534 int iSegCurr = 0;
1535 uint8_t *pbSegBuf = (uint8_t *)paSegments[iSegCurr].pvSeg;
1536 size_t cbSegLeft = paSegments[iSegCurr].cbSeg;
1537
1538 while (cbWrite)
1539 {
1540 size_t cbToWrite;
1541
1542 pEntry = pdmacFileEpCacheGetCacheEntryByOffset(pEndpointCache, off);
1543
1544 if (pEntry)
1545 {
1546 /* Write the data into the entry and mark it as dirty */
1547 AssertPtr(pEntry->pList);
1548
1549 RTFOFF OffDiff = off - pEntry->Core.Key;
1550
1551 AssertMsg(off >= pEntry->Core.Key,
1552 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1553 off, pEntry->Core.Key));
1554
1555 cbToWrite = RT_MIN(pEntry->cbData - OffDiff, cbWrite);
1556 cbWrite -= cbToWrite;
1557
1558 if (!cbWrite)
1559 STAM_COUNTER_INC(&pCache->cHits);
1560 else
1561 STAM_COUNTER_INC(&pCache->cPartialHits);
1562
1563 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1564
1565 /* Ghost lists contain no data. */
1566#ifdef VBOX_WITH_2Q_CACHE
1567 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1568 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1569#else
1570 if ( (pEntry->pList == &pCache->LruRecentlyUsed)
1571 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1572#endif
1573 {
1574 /* Check if the buffer is deprecated. */
1575 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1576 PDMACFILECACHE_ENTRY_IS_DEPRECATED,
1577 0))
1578 {
1579 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1580 ("Entry is deprecated but not in progress\n"));
1581 AssertPtr(pEntry->pbDataReplace);
1582
1583 LogFlow(("Writing to deprecated buffer of entry %#p\n", pEntry));
1584
1585 /* Update the data from the write. */
1586 while (cbToWrite)
1587 {
1588 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1589
1590 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1591
1592 ADVANCE_SEGMENT_BUFFER(cbCopy);
1593
1594 cbToWrite-= cbCopy;
1595 off += cbCopy;
1596 OffDiff += cbCopy;
1597 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1598 }
1599 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1600 }
1601 else /* Deprecated flag not set */
1602 {
1603 /* If the entry is dirty it must be also in progress now and we have to defer updating it again. */
1604 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1605 PDMACFILECACHE_ENTRY_IS_DIRTY,
1606 0))
1607 {
1608 AssertMsg(pEntry->fFlags & PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1609 ("Entry is dirty but not in progress\n"));
1610 Assert(!pEntry->pbDataReplace);
1611
1612 /* Deprecate the current buffer. */
1613 if (!pEntry->pWaitingHead)
1614 pEntry->pbDataReplace = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1615
1616 /* If we are out of memory or have waiting segments
1617 * defer the write. */
1618 if (!pEntry->pbDataReplace || pEntry->pWaitingHead)
1619 {
1620 /* The data isn't written to the file yet */
1621 while (cbToWrite)
1622 {
1623 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1624
1625 pSeg->pTask = pTask;
1626 pSeg->uBufOffset = OffDiff;
1627 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1628 pSeg->pvBuf = pbSegBuf;
1629 pSeg->fWrite = true;
1630
1631 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1632
1633 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1634
1635 off += pSeg->cbTransfer;
1636 OffDiff += pSeg->cbTransfer;
1637 cbToWrite -= pSeg->cbTransfer;
1638 }
1639 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1640 }
1641 else /* Deprecate buffer */
1642 {
1643 LogFlow(("Deprecating buffer for entry %#p\n", pEntry));
1644 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DEPRECATED;
1645
1646#if 1
1647 /* Copy the data before the update. */
1648 if (OffDiff)
1649 memcpy(pEntry->pbDataReplace, pEntry->pbData, OffDiff);
1650
1651 /* Copy data behind the update. */
1652 if ((pEntry->cbData - OffDiff - cbToWrite) > 0)
1653 memcpy(pEntry->pbDataReplace + OffDiff + cbToWrite,
1654 pEntry->pbData + OffDiff + cbToWrite,
1655 (pEntry->cbData - OffDiff - cbToWrite));
1656#else
1657 /* A safer method but probably slower. */
1658 memcpy(pEntry->pbDataReplace, pEntry->pbData, pEntry->cbData);
1659#endif
1660
1661 /* Update the data from the write. */
1662 while (cbToWrite)
1663 {
1664 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1665
1666 memcpy(pEntry->pbDataReplace + OffDiff, pbSegBuf, cbCopy);
1667
1668 ADVANCE_SEGMENT_BUFFER(cbCopy);
1669
1670 cbToWrite-= cbCopy;
1671 off += cbCopy;
1672 OffDiff += cbCopy;
1673 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1674 }
1675
1676 /* We are done here. A new write is initiated if the current request completes. */
1677 }
1678
1679 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1680 }
1681 else /* Dirty bit not set */
1682 {
1683 /*
1684 * Check if a read is in progress for this entry.
1685 * We have to defer processing in that case.
1686 */
1687 if(pdmacFileEpCacheEntryFlagIsSetClearAcquireLock(pEndpointCache, pEntry,
1688 PDMACFILECACHE_ENTRY_IO_IN_PROGRESS,
1689 0))
1690 {
1691 while (cbToWrite)
1692 {
1693 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1694
1695 pSeg->pTask = pTask;
1696 pSeg->uBufOffset = OffDiff;
1697 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1698 pSeg->pvBuf = pbSegBuf;
1699 pSeg->fWrite = true;
1700
1701 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1702
1703 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1704
1705 off += pSeg->cbTransfer;
1706 OffDiff += pSeg->cbTransfer;
1707 cbToWrite -= pSeg->cbTransfer;
1708 }
1709 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1710 RTSemRWReleaseWrite(pEndpointCache->SemRWEntries);
1711 }
1712 else /* I/O in progres flag not set */
1713 {
1714 /* Write as much as we can into the entry and update the file. */
1715 while (cbToWrite)
1716 {
1717 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1718
1719 memcpy(pEntry->pbData + OffDiff, pbSegBuf, cbCopy);
1720
1721 ADVANCE_SEGMENT_BUFFER(cbCopy);
1722
1723 cbToWrite-= cbCopy;
1724 off += cbCopy;
1725 OffDiff += cbCopy;
1726 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1727 }
1728
1729 pEntry->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1730 pdmacFileCacheWriteToEndpoint(pEntry);
1731 }
1732 } /* Dirty bit not set */
1733
1734 /* Move this entry to the top position */
1735#ifdef VBOX_WITH_2Q_CACHE
1736 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1737 {
1738 RTCritSectEnter(&pCache->CritSect);
1739 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1740 RTCritSectLeave(&pCache->CritSect);
1741 } /* Deprecated flag not set. */
1742#else
1743 RTCritSectEnter(&pCache->CritSect);
1744 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1745 RTCritSectLeave(&pCache->CritSect);
1746#endif
1747 }
1748 }
1749 else /* Entry is on the ghost list */
1750 {
1751 uint8_t *pbBuffer = NULL;
1752
1753#ifdef VBOX_WITH_2Q_CACHE
1754 RTCritSectEnter(&pCache->CritSect);
1755 pdmacFileCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1756 pdmacFileCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1757
1758 /* Move the entry to Am and fetch it to the cache. */
1759 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1760 RTCritSectLeave(&pCache->CritSect);
1761#else
1762 RTCritSectEnter(&pCache->CritSect);
1763 pdmacFileCacheUpdate(pCache, pEntry);
1764 pdmacFileCacheReplace(pCache, pEntry->cbData, pEntry->pList, true, &pbBuffer);
1765
1766 /* Move the entry to T2 and fetch it to the cache. */
1767 pdmacFileCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1768 RTCritSectLeave(&pCache->CritSect);
1769#endif
1770
1771 if (pbBuffer)
1772 pEntry->pbData = pbBuffer;
1773 else
1774 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1775 AssertPtr(pEntry->pbData);
1776
1777 while (cbToWrite)
1778 {
1779 PPDMACFILETASKSEG pSeg = (PPDMACFILETASKSEG)RTMemAllocZ(sizeof(PDMACFILETASKSEG));
1780
1781 AssertMsg(off >= pEntry->Core.Key,
1782 ("Overflow in calculation off=%RTfoff OffsetAligned=%RTfoff\n",
1783 off, pEntry->Core.Key));
1784
1785 pSeg->pTask = pTask;
1786 pSeg->uBufOffset = OffDiff;
1787 pSeg->cbTransfer = RT_MIN(cbToWrite, cbSegLeft);
1788 pSeg->pvBuf = pbSegBuf;
1789 pSeg->fWrite = true;
1790
1791 ADVANCE_SEGMENT_BUFFER(pSeg->cbTransfer);
1792
1793 pdmacFileEpCacheEntryAddWaitingSegment(pEntry, pSeg);
1794
1795 off += pSeg->cbTransfer;
1796 OffDiff += pSeg->cbTransfer;
1797 cbToWrite -= pSeg->cbTransfer;
1798 }
1799
1800 STAM_COUNTER_INC(&pEndpointCache->StatWriteDeferred);
1801 pdmacFileCacheReadFromEndpoint(pEntry);
1802 }
1803
1804 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
1805 pdmacFileEpCacheEntryRelease(pEntry);
1806 }
1807 else /* No entry found */
1808 {
1809 /*
1810 * No entry found. Try to create a new cache entry to store the data in and if that fails
1811 * write directly to the file.
1812 */
1813 PPDMACFILECACHEENTRY pEntryBestFit = pdmacFileEpCacheGetCacheBestFitEntryByOffset(pEndpointCache, off);
1814
1815 LogFlow(("%sest fit entry for off=%RTfoff (BestFit=%RTfoff BestFitEnd=%RTfoff BestFitSize=%u)\n",
1816 pEntryBestFit ? "B" : "No b",
1817 off,
1818 pEntryBestFit ? pEntryBestFit->Core.Key : 0,
1819 pEntryBestFit ? pEntryBestFit->Core.KeyLast : 0,
1820 pEntryBestFit ? pEntryBestFit->cbData : 0));
1821
1822 if (pEntryBestFit && ((off + (RTFOFF)cbWrite) > pEntryBestFit->Core.Key))
1823 {
1824 cbToWrite = pEntryBestFit->Core.Key - off;
1825 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1826 }
1827 else
1828 {
1829 if (pEntryBestFit)
1830 pdmacFileEpCacheEntryRelease(pEntryBestFit);
1831
1832 cbToWrite = cbWrite;
1833 }
1834
1835 cbWrite -= cbToWrite;
1836
1837 STAM_COUNTER_INC(&pCache->cMisses);
1838 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1839
1840 uint8_t *pbBuffer = NULL;
1841
1842#ifdef VBOX_WITH_2Q_CACHE
1843 RTCritSectEnter(&pCache->CritSect);
1844 bool fEnough = pdmacFileCacheReclaim(pCache, cbToWrite, true, &pbBuffer);
1845 RTCritSectLeave(&pCache->CritSect);
1846
1847 if (fEnough)
1848 {
1849 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbToWrite));
1850#else
1851 RTCritSectEnter(&pCache->CritSect);
1852 size_t cbRemoved = pdmacFileCacheEvict(pCache, cbToWrite, true, &pbBuffer);
1853 RTCritSectLeave(&pCache->CritSect);
1854
1855 if (cbRemoved >= cbToWrite)
1856 {
1857 LogFlow(("Evicted %u bytes (%u requested). Creating new cache entry\n", cbRemoved, cbToWrite));
1858
1859#endif
1860 uint8_t *pbBuf;
1861 PPDMACFILECACHEENTRY pEntryNew;
1862
1863 pEntryNew = pdmacFileCacheEntryAlloc(pCache, pEndpoint, off, cbToWrite, pbBuffer);
1864 AssertPtr(pEntryNew);
1865
1866 RTCritSectEnter(&pCache->CritSect);
1867#ifdef VBOX_WITH_2Q_CACHE
1868 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1869#else
1870 pdmacFileCacheEntryAddToList(&pCache->LruRecentlyUsed, pEntryNew);
1871#endif
1872 pCache->cbCached += cbToWrite;
1873 RTCritSectLeave(&pCache->CritSect);
1874
1875 pdmacFileEpCacheInsertEntry(pEndpointCache, pEntryNew);
1876
1877 off += cbToWrite;
1878 pbBuf = pEntryNew->pbData;
1879
1880 while (cbToWrite)
1881 {
1882 size_t cbCopy = RT_MIN(cbSegLeft, cbToWrite);
1883
1884 memcpy(pbBuf, pbSegBuf, cbCopy);
1885
1886 ADVANCE_SEGMENT_BUFFER(cbCopy);
1887
1888 cbToWrite -= cbCopy;
1889 pbBuf += cbCopy;
1890 ASMAtomicSubS32(&pTask->cbTransferLeft, cbCopy);
1891 }
1892
1893 pEntryNew->fFlags |= PDMACFILECACHE_ENTRY_IS_DIRTY;
1894 pdmacFileCacheWriteToEndpoint(pEntryNew);
1895 pdmacFileEpCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1896 }
1897 else
1898 {
1899 /*
1900 * There is not enough free space in the cache.
1901 * Pass the request directly to the I/O manager.
1902 */
1903 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
1904
1905 while (cbToWrite)
1906 {
1907 PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEndpoint);
1908 AssertPtr(pIoTask);
1909
1910 pIoTask->pEndpoint = pEndpoint;
1911 pIoTask->enmTransferType = PDMACTASKFILETRANSFER_WRITE;
1912 pIoTask->Off = off;
1913 pIoTask->DataSeg.cbSeg = RT_MIN(cbToWrite, cbSegLeft);
1914 pIoTask->DataSeg.pvSeg = pbSegBuf;
1915 pIoTask->pvUser = pTask;
1916 pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1917
1918 off += pIoTask->DataSeg.cbSeg;
1919 cbToWrite -= pIoTask->DataSeg.cbSeg;
1920
1921 ADVANCE_SEGMENT_BUFFER(pIoTask->DataSeg.cbSeg);
1922
1923 /* Send it off to the I/O manager. */
1924 pdmacFileEpAddTask(pEndpoint, pIoTask);
1925 }
1926 }
1927 }
1928 }
1929
1930 ASMAtomicWriteBool(&pTask->fCompleted, false);
1931
1932 if (ASMAtomicReadS32(&pTask->cbTransferLeft) == 0
1933 && !ASMAtomicXchgBool(&pTask->fCompleted, true))
1934 {
1935 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1936
1937 /* Complete a pending flush if all writes have completed */
1938 if (!ASMAtomicReadU32(&pEndpointCache->cWritesOutstanding))
1939 {
1940 PPDMASYNCCOMPLETIONTASKFILE pTaskFlush = (PPDMASYNCCOMPLETIONTASKFILE)ASMAtomicXchgPtr((void * volatile *)&pEndpointCache->pTaskFlush, NULL);
1941 if (pTaskFlush)
1942 pdmR3AsyncCompletionCompleteTask(&pTaskFlush->Core, true);
1943 }
1944 }
1945 else
1946 rc = VINF_AIO_TASK_PENDING;
1947
1948 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1949
1950 return rc;
1951}
1952
1953#undef ADVANCE_SEGMENT_BUFFER
1954
1955int pdmacFileEpCacheFlush(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMASYNCCOMPLETIONTASKFILE pTask)
1956{
1957 int rc = VINF_SUCCESS;
1958
1959 LogFlowFunc((": pEndpoint=%#p{%s} pTask=%#p\n",
1960 pEndpoint, pEndpoint->Core.pszUri, pTask));
1961
1962 if (ASMAtomicReadPtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush))
1963 rc = VERR_RESOURCE_BUSY;
1964 else
1965 {
1966 if (ASMAtomicReadU32(&pEndpoint->DataCache.cWritesOutstanding) > 0)
1967 {
1968 ASMAtomicWritePtr((void * volatile *)&pEndpoint->DataCache.pTaskFlush, pTask);
1969 rc = VINF_AIO_TASK_PENDING;
1970 }
1971 else
1972 pdmR3AsyncCompletionCompleteTask(&pTask->Core, false);
1973 }
1974
1975 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1976 return rc;
1977}
1978
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette