VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMBlkCache.cpp@ 34219

Last change on this file since 34219 was 34219, checked in by vboxsync, 14 years ago

PDM/BlockCache: First part for #5295, move the writeback cache into a separate component

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.5 KB
Line 
1/* $Id: PDMBlkCache.cpp 34219 2010-11-21 18:10:39Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/stam.h>
33#include <VBox/uvm.h>
34#include <VBox/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66/*******************************************************************************
67* Internal Functions *
68*******************************************************************************/
69
70/**
71 * Decrement the reference counter of the given cache entry.
72 *
73 * @returns nothing.
74 * @param pEntry The entry to release.
75 */
76DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
77{
78 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
79 ASMAtomicDecU32(&pEntry->cRefs);
80}
81
82/**
83 * Increment the reference counter of the given cache entry.
84 *
85 * @returns nothing.
86 * @param pEntry The entry to reference.
87 */
88DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
89{
90 ASMAtomicIncU32(&pEntry->cRefs);
91}
92
93#ifdef DEBUG
94static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
95{
96 /* Amount of cached data should never exceed the maximum amount. */
97 AssertMsg(pCache->cbCached <= pCache->cbMax,
98 ("Current amount of cached data exceeds maximum\n"));
99
100 /* The amount of cached data in the LRU and FRU list should match cbCached */
101 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
102 ("Amount of cached data doesn't match\n"));
103
104 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
105 ("Paged out list exceeds maximum\n"));
106}
107#endif
108
109DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
110{
111 RTCritSectEnter(&pCache->CritSect);
112#ifdef DEBUG
113 pdmBlkCacheValidate(pCache);
114#endif
115}
116
117DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
118{
119#ifdef DEBUG
120 pdmBlkCacheValidate(pCache);
121#endif
122 RTCritSectLeave(&pCache->CritSect);
123}
124
125DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
126{
127 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
128 pCache->cbCached -= cbAmount;
129}
130
131DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached += cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
138{
139 pList->cbCached += cbAmount;
140}
141
142DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
143{
144 pList->cbCached -= cbAmount;
145}
146
147#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
148/**
149 * Checks consistency of a LRU list.
150 *
151 * @returns nothing
152 * @param pList The LRU list to check.
153 * @param pNotInList Element which is not allowed to occur in the list.
154 */
155static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
156{
157 PPDMBLKCACHEENTRY pCurr = pList->pHead;
158
159 /* Check that there are no double entries and no cycles in the list. */
160 while (pCurr)
161 {
162 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
163
164 while (pNext)
165 {
166 AssertMsg(pCurr != pNext,
167 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
168 pCurr, pList));
169 pNext = pNext->pNext;
170 }
171
172 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
173
174 if (!pCurr->pNext)
175 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
176
177 pCurr = pCurr->pNext;
178 }
179}
180#endif
181
182/**
183 * Unlinks a cache entry from the LRU list it is assigned to.
184 *
185 * @returns nothing.
186 * @param pEntry The entry to unlink.
187 */
188static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
189{
190 PPDMBLKLRULIST pList = pEntry->pList;
191 PPDMBLKCACHEENTRY pPrev, pNext;
192
193 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
194
195 AssertPtr(pList);
196
197#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
198 pdmBlkCacheCheckList(pList, NULL);
199#endif
200
201 pPrev = pEntry->pPrev;
202 pNext = pEntry->pNext;
203
204 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
205 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
206
207 if (pPrev)
208 pPrev->pNext = pNext;
209 else
210 {
211 pList->pHead = pNext;
212
213 if (pNext)
214 pNext->pPrev = NULL;
215 }
216
217 if (pNext)
218 pNext->pPrev = pPrev;
219 else
220 {
221 pList->pTail = pPrev;
222
223 if (pPrev)
224 pPrev->pNext = NULL;
225 }
226
227 pEntry->pList = NULL;
228 pEntry->pPrev = NULL;
229 pEntry->pNext = NULL;
230 pdmBlkCacheListSub(pList, pEntry->cbData);
231#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
232 pdmBlkCacheCheckList(pList, pEntry);
233#endif
234}
235
236/**
237 * Adds a cache entry to the given LRU list unlinking it from the currently
238 * assigned list if needed.
239 *
240 * @returns nothing.
241 * @param pList List to the add entry to.
242 * @param pEntry Entry to add.
243 */
244static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
245{
246 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
247#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
248 pdmBlkCacheCheckList(pList, NULL);
249#endif
250
251 /* Remove from old list if needed */
252 if (pEntry->pList)
253 pdmBlkCacheEntryRemoveFromList(pEntry);
254
255 pEntry->pNext = pList->pHead;
256 if (pList->pHead)
257 pList->pHead->pPrev = pEntry;
258 else
259 {
260 Assert(!pList->pTail);
261 pList->pTail = pEntry;
262 }
263
264 pEntry->pPrev = NULL;
265 pList->pHead = pEntry;
266 pdmBlkCacheListAdd(pList, pEntry->cbData);
267 pEntry->pList = pList;
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, NULL);
270#endif
271}
272
273/**
274 * Destroys a LRU list freeing all entries.
275 *
276 * @returns nothing
277 * @param pList Pointer to the LRU list to destroy.
278 *
279 * @note The caller must own the critical section of the cache.
280 */
281static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
282{
283 while (pList->pHead)
284 {
285 PPDMBLKCACHEENTRY pEntry = pList->pHead;
286
287 pList->pHead = pEntry->pNext;
288
289 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
290 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
291
292 RTMemPageFree(pEntry->pbData, pEntry->cbData);
293 RTMemFree(pEntry);
294 }
295}
296
297/**
298 * Tries to remove the given amount of bytes from a given list in the cache
299 * moving the entries to one of the given ghosts lists
300 *
301 * @returns Amount of data which could be freed.
302 * @param pCache Pointer to the global cache data.
303 * @param cbData The amount of the data to free.
304 * @param pListSrc The source list to evict data from.
305 * @param pGhostListSrc The ghost list removed entries should be moved to
306 * NULL if the entry should be freed.
307 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
308 * @param ppbBuf Where to store the address of the buffer if an entry with the
309 * same size was found and fReuseBuffer is true.
310 *
311 * @note This function may return fewer bytes than requested because entries
312 * may be marked as non evictable if they are used for I/O at the
313 * moment.
314 */
315static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
316 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
317 bool fReuseBuffer, uint8_t **ppbBuffer)
318{
319 size_t cbEvicted = 0;
320
321 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
322
323 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
324 AssertMsg( !pGhostListDst
325 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
326 ("Destination list must be NULL or the recently used but paged out list\n"));
327
328 if (fReuseBuffer)
329 {
330 AssertPtr(ppbBuffer);
331 *ppbBuffer = NULL;
332 }
333
334 /* Start deleting from the tail. */
335 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
336
337 while ((cbEvicted < cbData) && pEntry)
338 {
339 PPDMBLKCACHEENTRY pCurr = pEntry;
340
341 pEntry = pEntry->pPrev;
342
343 /* We can't evict pages which are currently in progress or dirty but not in progress */
344 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
345 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
346 {
347 /* Ok eviction candidate. Grab the endpoint semaphore and check again
348 * because somebody else might have raced us. */
349 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
350 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
351
352 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
353 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
354 {
355 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
356
357 if (fReuseBuffer && (pCurr->cbData == cbData))
358 {
359 STAM_COUNTER_INC(&pCache->StatBuffersReused);
360 *ppbBuffer = pCurr->pbData;
361 }
362 else if (pCurr->pbData)
363 RTMemPageFree(pCurr->pbData, pCurr->cbData);
364
365 pCurr->pbData = NULL;
366 cbEvicted += pCurr->cbData;
367
368 pdmBlkCacheEntryRemoveFromList(pCurr);
369 pdmBlkCacheSub(pCache, pCurr->cbData);
370
371 if (pGhostListDst)
372 {
373 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
374
375 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
376
377 /* We have to remove the last entries from the paged out list. */
378 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
379 && pGhostEntFree)
380 {
381 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
382 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
383
384 pGhostEntFree = pGhostEntFree->pPrev;
385
386 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
387
388 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
389 {
390 pdmBlkCacheEntryRemoveFromList(pFree);
391
392 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
393 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
394 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
395
396 RTMemFree(pFree);
397 }
398
399 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
400 }
401
402 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
403 {
404 /* Couldn't remove enough entries. Delete */
405 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
406 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
407 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
408
409 RTMemFree(pCurr);
410 }
411 else
412 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
413 }
414 else
415 {
416 /* Delete the entry from the AVL tree it is assigned to. */
417 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
418 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
419 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
420
421 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
422 RTMemFree(pCurr);
423 }
424 }
425
426 }
427 else
428 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
429 }
430
431 return cbEvicted;
432}
433
434static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
435{
436 size_t cbRemoved = 0;
437
438 if ((pCache->cbCached + cbData) < pCache->cbMax)
439 return true;
440 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
441 {
442 /* Try to evict as many bytes as possible from A1in */
443 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
444 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
445
446 /*
447 * If it was not possible to remove enough entries
448 * try the frequently accessed cache.
449 */
450 if (cbRemoved < cbData)
451 {
452 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
453
454 /*
455 * If we removed something we can't pass the reuse buffer flag anymore because
456 * we don't need to evict that much data
457 */
458 if (!cbRemoved)
459 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
460 NULL, fReuseBuffer, ppbBuffer);
461 else
462 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
463 NULL, false, NULL);
464 }
465 }
466 else
467 {
468 /* We have to remove entries from frequently access list. */
469 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
470 NULL, fReuseBuffer, ppbBuffer);
471 }
472
473 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
474 return (cbRemoved >= cbData);
475}
476
477DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
478{
479 int rc = VINF_SUCCESS;
480
481 switch (pBlkCache->enmType)
482 {
483 case PDMBLKCACHETYPE_DEV:
484 {
485 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
486 pIoXfer->enmXferDir,
487 off, pIoXfer->cbXfer,
488 &pIoXfer->SgBuf, pIoXfer);
489 break;
490 }
491 case PDMBLKCACHETYPE_DRV:
492 {
493 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
494 pIoXfer->enmXferDir,
495 off, pIoXfer->cbXfer,
496 &pIoXfer->SgBuf, pIoXfer);
497 break;
498 }
499 case PDMBLKCACHETYPE_USB:
500 {
501 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
502 pIoXfer->enmXferDir,
503 off, pIoXfer->cbXfer,
504 &pIoXfer->SgBuf, pIoXfer);
505 break;
506 }
507 case PDMBLKCACHETYPE_INTERNAL:
508 {
509 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
510 pIoXfer->enmXferDir,
511 off, pIoXfer->cbXfer,
512 &pIoXfer->SgBuf, pIoXfer);
513 break;
514 }
515 default:
516 AssertMsgFailed(("Unknown block cache type!\n"));
517 }
518
519 return rc;
520}
521
522/**
523 * Initiates a read I/O task for the given entry.
524 *
525 * @returns VBox status code.
526 * @param pEntry The entry to fetch the data to.
527 */
528static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
529{
530 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
531 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
532
533 /* Make sure no one evicts the entry while it is accessed. */
534 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
535
536 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
537 if (RT_UNLIKELY(!pIoXfer))
538 return VERR_NO_MEMORY;
539
540 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
541
542 pIoXfer->fIoCache = true;
543 pIoXfer->pEntry = pEntry;
544 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
545 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
546 pIoXfer->cbXfer = pEntry->cbData;
547 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
548 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
549
550 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
551}
552
553/**
554 * Initiates a write I/O task for the given entry.
555 *
556 * @returns nothing.
557 * @param pEntry The entry to read the data from.
558 */
559static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
560{
561 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
562 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
563
564 /* Make sure no one evicts the entry while it is accessed. */
565 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
566
567 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
568 if (RT_UNLIKELY(!pIoXfer))
569 return VERR_NO_MEMORY;
570
571 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
572
573 pIoXfer->fIoCache = true;
574 pIoXfer->pEntry = pEntry;
575 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
576 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
577 pIoXfer->cbXfer = pEntry->cbData;
578 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
579 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
580
581 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
582}
583
584/**
585 * Passthrough a part of a request directly to the I/O manager
586 * handling the endpoint.
587 *
588 * @returns VBox status code.
589 * @param pEndpoint The endpoint.
590 * @param pTask The task.
591 * @param pIoMemCtx The I/O memory context to use.
592 * @param offStart Offset to start transfer from.
593 * @param cbData Amount of data to transfer.
594 * @param enmTransferType The transfer type (read/write)
595 */
596static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
597 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
598 PDMBLKCACHEXFERDIR enmXferDir)
599{
600
601 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
602 if (RT_UNLIKELY(!pIoXfer))
603 return VERR_NO_MEMORY;
604
605 pIoXfer->fIoCache = false;
606 pIoXfer->pReq = pReq;
607 pIoXfer->cbXfer = cbData;
608 pIoXfer->enmXferDir = enmXferDir;
609 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
610
611 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
612}
613
614/**
615 * Commit a single dirty entry to the endpoint
616 *
617 * @returns nothing
618 * @param pEntry The entry to commit.
619 */
620static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
621{
622 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
623 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
624 ("Invalid flags set for entry %#p\n", pEntry));
625
626 pdmBlkCacheEntryWriteToMedium(pEntry);
627}
628
629/**
630 * Commit all dirty entries for a single endpoint.
631 *
632 * @returns nothing.
633 * @param pBlkCache The endpoint cache to commit.
634 */
635static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
636{
637 uint32_t cbCommitted = 0;
638 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
639
640 /* The list is moved to a new header to reduce locking overhead. */
641 RTLISTNODE ListDirtyNotCommitted;
642 RTSPINLOCKTMP Tmp;
643
644 RTListInit(&ListDirtyNotCommitted);
645 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
646 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
647 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
648
649 if (!RTListIsEmpty(&ListDirtyNotCommitted))
650 {
651 PPDMBLKCACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
652 PDMBLKCACHEENTRY,
653 NodeNotCommitted);
654
655 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
656 {
657 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
658 NodeNotCommitted);
659 pdmBlkCacheEntryCommit(pEntry);
660 cbCommitted += pEntry->cbData;
661 RTListNodeRemove(&pEntry->NodeNotCommitted);
662 pEntry = pNext;
663 }
664
665 /* Commit the last endpoint */
666 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
667 pdmBlkCacheEntryCommit(pEntry);
668 RTListNodeRemove(&pEntry->NodeNotCommitted);
669 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
670 ("Committed all entries but list is not empty\n"));
671 }
672
673 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
674 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
675 ("Number of committed bytes exceeds number of dirty bytes\n"));
676 ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
677}
678
679/**
680 * Commit all dirty entries in the cache.
681 *
682 * @returns nothing.
683 * @param pCache The global cache instance.
684 */
685static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
686{
687 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
688
689 if (!fCommitInProgress)
690 {
691 pdmBlkCacheLockEnter(pCache);
692 Assert(!RTListIsEmpty(&pCache->ListUsers));
693
694 PPDMBLKCACHE pBlkCache = RTListNodeGetFirst(&pCache->ListUsers,
695 PDMBLKCACHE,
696 NodeCacheUser);
697 AssertPtr(pBlkCache);
698
699 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
700 {
701 pdmBlkCacheCommit(pBlkCache);
702
703 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
704 NodeCacheUser);
705 }
706
707 /* Commit the last endpoint */
708 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
709 pdmBlkCacheCommit(pBlkCache);
710
711 pdmBlkCacheLockLeave(pCache);
712 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
713 }
714}
715
716/**
717 * Adds the given entry as a dirty to the cache.
718 *
719 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
720 * @param pBlkCache The endpoint cache the entry belongs to.
721 * @param pEntry The entry to add.
722 */
723static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
724{
725 bool fDirtyBytesExceeded = false;
726 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
727
728 /* If the commit timer is disabled we commit right away. */
729 if (pCache->u32CommitTimeoutMs == 0)
730 {
731 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
732 pdmBlkCacheEntryCommit(pEntry);
733 }
734 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
735 {
736 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
737
738 RTSPINLOCKTMP Tmp;
739 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
740 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
741 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
742
743 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
744
745 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
746 }
747
748 return fDirtyBytesExceeded;
749}
750
751/**
752 * Commit timer callback.
753 */
754static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
755{
756 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
757
758 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
759
760 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
761 pdmBlkCacheCommitDirtyEntries(pCache);
762
763 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
764 LogFlowFunc(("Entries committed, going to sleep\n"));
765}
766
767int pdmR3BlkCacheInit(PVM pVM)
768{
769 int rc = VINF_SUCCESS;
770 PUVM pUVM = pVM->pUVM;
771 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
772
773 LogFlowFunc((": pVM=%p\n", pVM));
774
775 VM_ASSERT_EMT(pVM);
776
777 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
778 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
779
780 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
781 if (!pBlkCacheGlobal)
782 return VERR_NO_MEMORY;
783
784 RTListInit(&pBlkCacheGlobal->ListUsers);
785 pBlkCacheGlobal->pVM = pVM;
786 pBlkCacheGlobal->cRefs = 0;
787 pBlkCacheGlobal->cbCached = 0;
788 pBlkCacheGlobal->fCommitInProgress = false;
789
790 /* Initialize members */
791 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
792 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
793 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
794
795 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
796 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
797 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
798
799 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
800 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
801 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
802
803 do
804 {
805 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
806 AssertLogRelRCBreak(rc);
807 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
808
809 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
810 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
811 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
812 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
813
814 /** @todo r=aeichner: Experiment to find optimal default values */
815 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
816 AssertLogRelRCBreak(rc);
817 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
818 AssertLogRelRCBreak(rc);
819 } while (0);
820
821 if (RT_SUCCESS(rc))
822 {
823 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
824 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
825 "/PDM/BlkCache/cbMax",
826 STAMUNIT_BYTES,
827 "Maximum cache size");
828 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
829 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
830 "/PDM/BlkCache/cbCached",
831 STAMUNIT_BYTES,
832 "Currently used cache");
833 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
834 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
835 "/PDM/BlkCache/cbCachedMruIn",
836 STAMUNIT_BYTES,
837 "Number of bytes cached in MRU list");
838 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
839 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
840 "/PDM/BlkCache/cbCachedMruOut",
841 STAMUNIT_BYTES,
842 "Number of bytes cached in FRU list");
843 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
844 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
845 "/PDM/BlkCache/cbCachedFru",
846 STAMUNIT_BYTES,
847 "Number of bytes cached in FRU ghost list");
848
849#ifdef VBOX_WITH_STATISTICS
850 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
851 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
852 "/PDM/BlkCache/CacheHits",
853 STAMUNIT_COUNT, "Number of hits in the cache");
854 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
855 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
856 "/PDM/BlkCache/CachePartialHits",
857 STAMUNIT_COUNT, "Number of partial hits in the cache");
858 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
859 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
860 "/PDM/BlkCache/CacheMisses",
861 STAMUNIT_COUNT, "Number of misses when accessing the cache");
862 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
863 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
864 "/PDM/BlkCache/CacheRead",
865 STAMUNIT_BYTES, "Number of bytes read from the cache");
866 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
867 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
868 "/PDM/BlkCache/CacheWritten",
869 STAMUNIT_BYTES, "Number of bytes written to the cache");
870 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
871 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
872 "/PDM/BlkCache/CacheTreeGet",
873 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
874 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
875 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
876 "/PDM/BlkCache/CacheTreeInsert",
877 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
878 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
879 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
880 "/PDM/BlkCache/CacheTreeRemove",
881 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
882 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
883 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
884 "/PDM/BlkCache/CacheBuffersReused",
885 STAMUNIT_COUNT, "Number of times a buffer could be reused");
886#endif
887
888 /* Initialize the critical section */
889 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
890 }
891
892 if (RT_SUCCESS(rc))
893 {
894 /* Create the commit timer */
895 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
896 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
897 pdmBlkCacheCommitTimerCallback,
898 pBlkCacheGlobal,
899 "BlkCache-Commit",
900 &pBlkCacheGlobal->pTimerCommit);
901
902 if (RT_SUCCESS(rc))
903 {
904 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
905 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
906 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
907 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
908 return VINF_SUCCESS;
909 }
910
911 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
912 }
913
914 if (pBlkCacheGlobal)
915 RTMemFree(pBlkCacheGlobal);
916
917 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
918 return rc;
919}
920
921void pdmR3BlkCacheTerm(PVM pVM)
922{
923 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
924
925 if (pBlkCacheGlobal)
926 {
927 /* Make sure no one else uses the cache now */
928 pdmBlkCacheLockEnter(pBlkCacheGlobal);
929
930 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
931 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
932 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
933 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
934
935 pdmBlkCacheLockLeave(pBlkCacheGlobal);
936
937 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
938 RTMemFree(pBlkCacheGlobal);
939 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
940 }
941}
942
943static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
944{
945 int rc = VINF_SUCCESS;
946 PPDMBLKCACHE pBlkCache = NULL;
947 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
948
949 if (!pBlkCacheGlobal)
950 return VERR_NOT_SUPPORTED;
951
952 /*
953 * Check that no other user cache has the same id first,
954 * Unique id's are necessary in case the state is saved.
955 */
956 pdmBlkCacheLockEnter(pBlkCacheGlobal);
957
958 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
959 {
960 if (!RTStrCmp(pBlkCache->pszId, pcszId))
961 break;
962 }
963
964 if (!pBlkCache)
965 {
966 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
967
968 if (pBlkCache)
969 pBlkCache->pszId = RTStrDup(pcszId);
970
971 if ( pBlkCache
972 && pBlkCache->pszId)
973 {
974 pBlkCache->pCache = pBlkCacheGlobal;
975 RTListInit(&pBlkCache->ListDirtyNotCommitted);
976
977 rc = RTSpinlockCreate(&pBlkCache->LockList);
978 if (RT_SUCCESS(rc))
979 {
980 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
981 if (RT_SUCCESS(rc))
982 {
983 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
984 if (pBlkCache->pTree)
985 {
986 /* Arm the timer if this is the first endpoint. */
987 if ( pBlkCacheGlobal->cRefs == 1
988 && pBlkCacheGlobal->u32CommitTimeoutMs > 0)
989 rc = TMTimerSetMillies(pBlkCacheGlobal->pTimerCommit, pBlkCacheGlobal->u32CommitTimeoutMs);
990
991 if (RT_SUCCESS(rc))
992 {
993#ifdef VBOX_WITH_STATISTICS
994 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
995 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
996 STAMUNIT_COUNT, "Number of deferred writes",
997 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
998#endif
999
1000 /* Add to the list of users. */
1001 pBlkCacheGlobal->cRefs++;
1002 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1003 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1004
1005 *ppBlkCache = pBlkCache;
1006 LogFlowFunc(("returns success\n"));
1007 return VINF_SUCCESS;
1008 }
1009 }
1010 else
1011 rc = VERR_NO_MEMORY;
1012
1013 RTSemRWDestroy(pBlkCache->SemRWEntries);
1014 }
1015
1016 RTSpinlockDestroy(pBlkCache->LockList);
1017 }
1018
1019 RTStrFree(pBlkCache->pszId);
1020 }
1021 else
1022 rc = VERR_NO_MEMORY;
1023
1024 if (pBlkCache)
1025 RTMemFree(pBlkCache);
1026 }
1027 else
1028 rc = VERR_ALREADY_EXISTS;
1029
1030 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1031
1032 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1033 return rc;
1034}
1035
1036VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1037 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1038 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1039 const char *pcszId)
1040{
1041 int rc = VINF_SUCCESS;
1042 PPDMBLKCACHE pBlkCache;
1043
1044 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1045 if (RT_SUCCESS(rc))
1046 {
1047 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1048 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1049 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1050 }
1051
1052 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1053 return rc;
1054}
1055
1056VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1057 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1058 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1059 const char *pcszId)
1060{
1061 int rc = VINF_SUCCESS;
1062 PPDMBLKCACHE pBlkCache;
1063
1064 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1065 if (RT_SUCCESS(rc))
1066 {
1067 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1068 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1069 pBlkCache->u.Dev.pDevIns = pDevIns;
1070 }
1071
1072 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1073 return rc;
1074
1075}
1076
1077VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1078 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1079 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1080 const char *pcszId)
1081{
1082 int rc = VINF_SUCCESS;
1083 PPDMBLKCACHE pBlkCache;
1084
1085 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1086 if (RT_SUCCESS(rc))
1087 {
1088 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1089 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1090 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1091 }
1092
1093 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1094 return rc;
1095
1096}
1097
1098VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1099 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1100 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1101 const char *pcszId)
1102{
1103 int rc = VINF_SUCCESS;
1104 PPDMBLKCACHE pBlkCache;
1105
1106 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1107 if (RT_SUCCESS(rc))
1108 {
1109 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1110 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1111 pBlkCache->u.Int.pvUser = pvUser;
1112 }
1113
1114 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1115 return rc;
1116
1117}
1118
1119/**
1120 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1121 *
1122 * @returns IPRT status code.
1123 * @param pNode The node to destroy.
1124 * @param pvUser Opaque user data.
1125 */
1126static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1127{
1128 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1129 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1130 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1131
1132 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY))
1133 {
1134 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1135 pdmBlkCacheEntryRef(pEntry);
1136 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1137 pdmBlkCacheLockLeave(pCache);
1138
1139 RTThreadSleep(250);
1140
1141 /* Re-enter all locks */
1142 pdmBlkCacheLockEnter(pCache);
1143 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1144 pdmBlkCacheEntryRelease(pEntry);
1145 }
1146
1147 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
1148 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1149
1150 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1151 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1152
1153 pdmBlkCacheEntryRemoveFromList(pEntry);
1154
1155 if (fUpdateCache)
1156 pdmBlkCacheSub(pCache, pEntry->cbData);
1157
1158 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1159 RTMemFree(pEntry);
1160
1161 return VINF_SUCCESS;
1162}
1163
1164/**
1165 * Destroys all cache resources used by the given endpoint.
1166 *
1167 * @returns nothing.
1168 * @param pEndpoint The endpoint to the destroy.
1169 */
1170VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1171{
1172 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1173
1174 /* Make sure nobody is accessing the cache while we delete the tree. */
1175 pdmBlkCacheLockEnter(pCache);
1176 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1177 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1178 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1179
1180 RTSpinlockDestroy(pBlkCache->LockList);
1181
1182 pCache->cRefs--;
1183 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1184
1185 if ( !pCache->cRefs
1186 && pCache->u32CommitTimeoutMs > 0)
1187 TMTimerStop(pCache->pTimerCommit);
1188
1189 pdmBlkCacheLockLeave(pCache);
1190
1191 RTSemRWDestroy(pBlkCache->SemRWEntries);
1192
1193#ifdef VBOX_WITH_STATISTICS
1194 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1195#endif
1196
1197 RTStrFree(pBlkCache->pszId);
1198 RTMemFree(pBlkCache);
1199}
1200
1201VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1202{
1203 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1204
1205 /*
1206 * Validate input.
1207 */
1208 if (!pDevIns)
1209 return;
1210 VM_ASSERT_EMT(pVM);
1211
1212 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1213 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1214
1215 /* Return silently if not supported. */
1216 if (!pBlkCacheGlobal)
1217 return;
1218
1219 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1220
1221 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1222 {
1223 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1224 && pBlkCache->u.Dev.pDevIns == pDevIns)
1225 PDMR3BlkCacheRelease(pBlkCache);
1226 }
1227
1228 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1229}
1230
1231VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1232{
1233 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1234
1235 /*
1236 * Validate input.
1237 */
1238 if (!pDrvIns)
1239 return;
1240 VM_ASSERT_EMT(pVM);
1241
1242 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1243 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1244
1245 /* Return silently if not supported. */
1246 if (!pBlkCacheGlobal)
1247 return;
1248
1249 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1250
1251 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1252 {
1253 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1254 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1255 PDMR3BlkCacheRelease(pBlkCache);
1256 }
1257
1258 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1259}
1260
1261VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1262{
1263 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1264
1265 /*
1266 * Validate input.
1267 */
1268 if (!pUsbIns)
1269 return;
1270 VM_ASSERT_EMT(pVM);
1271
1272 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1273 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1274
1275 /* Return silently if not supported. */
1276 if (!pBlkCacheGlobal)
1277 return;
1278
1279 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1280
1281 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1282 {
1283 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1284 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1285 PDMR3BlkCacheRelease(pBlkCache);
1286 }
1287
1288 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1289}
1290
1291static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1292{
1293 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1294 PPDMBLKCACHEENTRY pEntry = NULL;
1295
1296 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1297
1298 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1299 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1300 if (pEntry)
1301 pdmBlkCacheEntryRef(pEntry);
1302 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1303
1304 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1305
1306 return pEntry;
1307}
1308
1309/**
1310 * Return the best fit cache entries for the given offset.
1311 *
1312 * @returns nothing.
1313 * @param pBlkCache The endpoint cache.
1314 * @param off The offset.
1315 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1316 * the given offset. NULL if not required.
1317 * @param pEntryBelow Where to store the pointer to the best fit entry below the
1318 * the given offset. NULL if not required.
1319 */
1320static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1321 PPDMBLKCACHEENTRY *ppEntryAbove,
1322 PPDMBLKCACHEENTRY *ppEntryBelow)
1323{
1324 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1325
1326 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1327
1328 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1329 if (ppEntryAbove)
1330 {
1331 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1332 if (*ppEntryAbove)
1333 pdmBlkCacheEntryRef(*ppEntryAbove);
1334 }
1335
1336 if (ppEntryBelow)
1337 {
1338 *ppEntryBelow = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, false /*fAbove*/);
1339 if (*ppEntryBelow)
1340 pdmBlkCacheEntryRef(*ppEntryBelow);
1341 }
1342 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1343
1344 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1345}
1346
1347static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1348{
1349 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1350
1351 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1352 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1353 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1354 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1355 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1356 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1357}
1358
1359/**
1360 * Allocates and initializes a new entry for the cache.
1361 * The entry has a reference count of 1.
1362 *
1363 * @returns Pointer to the new cache entry or NULL if out of memory.
1364 * @param pBlkCache The cache the entry belongs to.
1365 * @param off Start offset.
1366 * @param cbData Size of the cache entry.
1367 * @param pbBuffer Pointer to the buffer to use.
1368 * NULL if a new buffer should be allocated.
1369 * The buffer needs to have the same size of the entry.
1370 */
1371static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1372 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1373{
1374 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1375
1376 if (RT_UNLIKELY(!pEntryNew))
1377 return NULL;
1378
1379 pEntryNew->Core.Key = off;
1380 pEntryNew->Core.KeyLast = off + cbData - 1;
1381 pEntryNew->pBlkCache = pBlkCache;
1382 pEntryNew->fFlags = 0;
1383 pEntryNew->cRefs = 1; /* We are using it now. */
1384 pEntryNew->pList = NULL;
1385 pEntryNew->cbData = cbData;
1386 pEntryNew->pWaitingHead = NULL;
1387 pEntryNew->pWaitingTail = NULL;
1388 if (pbBuffer)
1389 pEntryNew->pbData = pbBuffer;
1390 else
1391 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1392
1393 if (RT_UNLIKELY(!pEntryNew->pbData))
1394 {
1395 RTMemFree(pEntryNew);
1396 return NULL;
1397 }
1398
1399 return pEntryNew;
1400}
1401
1402/**
1403 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1404 * in exclusive mode.
1405 *
1406 * @returns true if the flag in fSet is set and the one in fClear is clear.
1407 * false otherwise.
1408 * The R/W semaphore is only held if true is returned.
1409 *
1410 * @param pBlkCache The endpoint cache instance data.
1411 * @param pEntry The entry to check the flags for.
1412 * @param fSet The flag which is tested to be set.
1413 * @param fClear The flag which is tested to be clear.
1414 */
1415DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1416 PPDMBLKCACHEENTRY pEntry,
1417 uint32_t fSet, uint32_t fClear)
1418{
1419 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1420 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1421
1422 if (fPassed)
1423 {
1424 /* Acquire the lock and check again because the completion callback might have raced us. */
1425 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1426
1427 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1428 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1429
1430 /* Drop the lock if we didn't passed the test. */
1431 if (!fPassed)
1432 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1433 }
1434
1435 return fPassed;
1436}
1437
1438/**
1439 * Adds a segment to the waiting list for a cache entry
1440 * which is currently in progress.
1441 *
1442 * @returns nothing.
1443 * @param pEntry The cache entry to add the segment to.
1444 * @param pSeg The segment to add.
1445 */
1446DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1447 PPDMBLKCACHEWAITER pWaiter)
1448{
1449 pWaiter->pNext = NULL;
1450
1451 if (pEntry->pWaitingHead)
1452 {
1453 AssertPtr(pEntry->pWaitingTail);
1454
1455 pEntry->pWaitingTail->pNext = pWaiter;
1456 pEntry->pWaitingTail = pWaiter;
1457 }
1458 else
1459 {
1460 Assert(!pEntry->pWaitingTail);
1461
1462 pEntry->pWaitingHead = pWaiter;
1463 pEntry->pWaitingTail = pWaiter;
1464 }
1465}
1466
1467/**
1468 * Add a buffer described by the I/O memory context
1469 * to the entry waiting for completion.
1470 *
1471 * @returns VBox status code.
1472 * @param pEntry The entry to add the buffer to.
1473 * @param pTask Task associated with the buffer.
1474 * @param pIoMemCtx The memory context to use.
1475 * @param offDiff Offset from the start of the buffer
1476 * in the entry.
1477 * @param cbData Amount of data to wait for onthis entry.
1478 * @param fWrite Flag whether the task waits because it wants to write
1479 * to the cache entry.
1480 */
1481static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1482 PPDMBLKCACHEREQ pReq,
1483 PCRTSGBUF pSgBuf, uint64_t offDiff,
1484 size_t cbData, bool fWrite)
1485{
1486 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1487 if (!pWaiter)
1488 return VERR_NO_MEMORY;
1489
1490 pWaiter->pReq = pReq;
1491 pWaiter->offCacheEntry = offDiff;
1492 pWaiter->cbTransfer = cbData;
1493 pWaiter->fWrite = fWrite;
1494 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1495
1496 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1497
1498 return VINF_SUCCESS;
1499}
1500
1501/**
1502 * Calculate aligned offset and size for a new cache entry
1503 * which do not intersect with an already existing entry and the
1504 * file end.
1505 *
1506 * @returns The number of bytes the entry can hold of the requested amount
1507 * of byte.
1508 * @param pEndpoint The endpoint.
1509 * @param pBlkCache The endpoint cache.
1510 * @param off The start offset.
1511 * @param cb The number of bytes the entry needs to hold at least.
1512 * @param uAlignment Alignment of the boundary sizes.
1513 * @param poffAligned Where to store the aligned offset.
1514 * @param pcbAligned Where to store the aligned size of the entry.
1515 */
1516static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1517 uint64_t off, size_t cb,
1518 unsigned uAlignment,
1519 uint64_t *poffAligned, size_t *pcbAligned)
1520{
1521 size_t cbAligned;
1522 size_t cbInEntry = 0;
1523 uint64_t offAligned;
1524 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1525 PPDMBLKCACHEENTRY pEntryBelow = NULL;
1526
1527 /* Get the best fit entries around the offset */
1528 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove, &pEntryBelow);
1529
1530 /* Log the info */
1531 LogFlow(("%sest fit entry below off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1532 pEntryBelow ? "B" : "No b",
1533 off,
1534 pEntryBelow ? pEntryBelow->Core.Key : 0,
1535 pEntryBelow ? pEntryBelow->Core.KeyLast : 0,
1536 pEntryBelow ? pEntryBelow->cbData : 0));
1537
1538 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1539 pEntryAbove ? "B" : "No b",
1540 off,
1541 pEntryAbove ? pEntryAbove->Core.Key : 0,
1542 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1543 pEntryAbove ? pEntryAbove->cbData : 0));
1544
1545 /* Align the offset first. */
1546 offAligned = off & ~(uint64_t)(512-1);
1547 if ( pEntryBelow
1548 && offAligned <= pEntryBelow->Core.KeyLast)
1549 offAligned = pEntryBelow->Core.KeyLast;
1550
1551 if ( pEntryAbove
1552 && off + cb > pEntryAbove->Core.Key)
1553 {
1554 cbInEntry = pEntryAbove->Core.Key - off;
1555 cbAligned = pEntryAbove->Core.Key - offAligned;
1556 }
1557 else
1558 {
1559 cbAligned = cb;
1560 cbInEntry = cb;
1561#if 0
1562 /*
1563 * Align the size to a 4KB boundary.
1564 * Memory size is aligned to a page boundary
1565 * and memory is wasted if the size is rather small.
1566 * (For example reads with a size of 512 bytes).
1567 */
1568 cbInEntry = cb;
1569 cbAligned = RT_ALIGN_Z(cb + (off - offAligned), uAlignment);
1570
1571 /*
1572 * Clip to file size if the original request doesn't
1573 * exceed the file (not an appending write)
1574 */
1575 uint64_t cbReq = off + cb;
1576 if (cbReq >= pEndpoint->cbFile)
1577 cbAligned = cbReq - offAligned;
1578 else
1579 cbAligned = RT_MIN(pEndpoint->cbFile - offAligned, cbAligned);
1580 if (pEntryAbove)
1581 {
1582 Assert(pEntryAbove->Core.Key >= off);
1583 cbAligned = RT_MIN(cbAligned, pEntryAbove->Core.Key - offAligned);
1584 }
1585#endif
1586 }
1587
1588 /* A few sanity checks */
1589 AssertMsg(!pEntryBelow || pEntryBelow->Core.KeyLast < offAligned,
1590 ("Aligned start offset intersects with another cache entry\n"));
1591 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1592 ("Aligned size intersects with another cache entry\n"));
1593 Assert(cbInEntry <= cbAligned);
1594
1595 if (pEntryBelow)
1596 pdmBlkCacheEntryRelease(pEntryBelow);
1597 if (pEntryAbove)
1598 pdmBlkCacheEntryRelease(pEntryAbove);
1599
1600 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1601
1602 *poffAligned = offAligned;
1603 *pcbAligned = cbAligned;
1604
1605 return cbInEntry;
1606}
1607
1608/**
1609 * Create a new cache entry evicting data from the cache if required.
1610 *
1611 * @returns Pointer to the new cache entry or NULL
1612 * if not enough bytes could be evicted from the cache.
1613 * @param pEndpoint The endpoint.
1614 * @param pBlkCache The endpoint cache.
1615 * @param off The offset.
1616 * @param cb Number of bytes the cache entry should have.
1617 * @param uAlignment Alignment the size of the entry should have.
1618 * @param pcbData Where to store the number of bytes the new
1619 * entry can hold. May be lower than actually requested
1620 * due to another entry intersecting the access range.
1621 */
1622static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1623 uint64_t off, size_t cb,
1624 unsigned uAlignment,
1625 size_t *pcbData)
1626{
1627 uint64_t offStart = 0;
1628 size_t cbEntry = 0;
1629 PPDMBLKCACHEENTRY pEntryNew = NULL;
1630 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1631 uint8_t *pbBuffer = NULL;
1632
1633 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1634 &offStart, &cbEntry);
1635
1636 pdmBlkCacheLockEnter(pCache);
1637 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1638
1639 if (fEnough)
1640 {
1641 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1642
1643 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1644 if (RT_LIKELY(pEntryNew))
1645 {
1646 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1647 pdmBlkCacheAdd(pCache, cbEntry);
1648 pdmBlkCacheLockLeave(pCache);
1649
1650 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1651
1652 AssertMsg( (off >= pEntryNew->Core.Key)
1653 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1654 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1655 off, pEntryNew->Core.Key));
1656 }
1657 else
1658 pdmBlkCacheLockLeave(pCache);
1659 }
1660 else
1661 pdmBlkCacheLockLeave(pCache);
1662
1663 return pEntryNew;
1664}
1665
1666static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1667{
1668 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1669
1670 if (RT_LIKELY(pReq))
1671 {
1672 pReq->pvUser = pvUser;
1673 pReq->cbXfer = cbXfer;
1674 pReq->rcReq = VINF_SUCCESS;
1675 pReq->cXfersPending = 0;
1676 }
1677
1678 return pReq;
1679}
1680
1681static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1682{
1683 switch (pBlkCache->enmType)
1684 {
1685 case PDMBLKCACHETYPE_DEV:
1686 {
1687 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1688 pReq->pvUser, pReq->rcReq);
1689 break;
1690 }
1691 case PDMBLKCACHETYPE_DRV:
1692 {
1693 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1694 pReq->pvUser, pReq->rcReq);
1695 break;
1696 }
1697 case PDMBLKCACHETYPE_USB:
1698 {
1699 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1700 pReq->pvUser, pReq->rcReq);
1701 break;
1702 }
1703 case PDMBLKCACHETYPE_INTERNAL:
1704 {
1705 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1706 pReq->pvUser, pReq->rcReq);
1707 break;
1708 }
1709 default:
1710 AssertMsgFailed(("Unknown block cache type!\n"));
1711 }
1712
1713 RTMemFree(pReq);
1714}
1715
1716static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1717 size_t cbComplete, int rcReq, bool fCallHandler)
1718{
1719 if (RT_FAILURE(rcReq))
1720 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1721
1722 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1723 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1724 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1725 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1726
1727 if ( !(cbOld - cbComplete)
1728 && !cXfersPending)
1729 {
1730 if (fCallHandler)
1731 pdmBlkCacheReqComplete(pBlkCache, pReq);
1732 return true;
1733 }
1734
1735 return false;
1736}
1737
1738VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1739 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1740{
1741 int rc = VINF_SUCCESS;
1742 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1743 PPDMBLKCACHEENTRY pEntry;
1744 PPDMBLKCACHEREQ pReq;
1745
1746 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1747 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1748
1749 RTSGBUF SgBuf;
1750 RTSgBufClone(&SgBuf, pcSgBuf);
1751
1752 /* Allocate new request structure. */
1753 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1754 if (RT_UNLIKELY(pReq))
1755 return VERR_NO_MEMORY;
1756
1757 /* Increment data transfer counter to keep the request valid while we access it. */
1758 ASMAtomicIncU32(&pReq->cXfersPending);
1759
1760 while (cbRead)
1761 {
1762 size_t cbToRead;
1763
1764 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1765
1766 /*
1767 * If there is no entry we try to create a new one eviciting unused pages
1768 * if the cache is full. If this is not possible we will pass the request through
1769 * and skip the caching (all entries may be still in progress so they can't
1770 * be evicted)
1771 * If we have an entry it can be in one of the LRU lists where the entry
1772 * contains data (recently used or frequently used LRU) so we can just read
1773 * the data we need and put the entry at the head of the frequently used LRU list.
1774 * In case the entry is in one of the ghost lists it doesn't contain any data.
1775 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1776 */
1777 if (pEntry)
1778 {
1779 uint64_t offDiff = off - pEntry->Core.Key;
1780
1781 AssertMsg(off >= pEntry->Core.Key,
1782 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1783 off, pEntry->Core.Key));
1784
1785 AssertPtr(pEntry->pList);
1786
1787 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1788
1789 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1790 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1791 off, cbToRead));
1792
1793 cbRead -= cbToRead;
1794
1795 if (!cbRead)
1796 STAM_COUNTER_INC(&pCache->cHits);
1797 else
1798 STAM_COUNTER_INC(&pCache->cPartialHits);
1799
1800 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1801
1802 /* Ghost lists contain no data. */
1803 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1804 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1805 {
1806 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1807 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1808 PDMBLKCACHE_ENTRY_IS_DIRTY))
1809 {
1810 /* Entry didn't completed yet. Append to the list */
1811 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1812 &SgBuf, offDiff, cbToRead,
1813 false /* fWrite */);
1814 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1815 }
1816 else
1817 {
1818 /* Read as much as we can from the entry. */
1819 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
1820 }
1821
1822 /* Move this entry to the top position */
1823 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1824 {
1825 pdmBlkCacheLockEnter(pCache);
1826 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1827 pdmBlkCacheLockLeave(pCache);
1828 }
1829 /* Release the entry */
1830 pdmBlkCacheEntryRelease(pEntry);
1831 }
1832 else
1833 {
1834 uint8_t *pbBuffer = NULL;
1835
1836 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1837
1838 pdmBlkCacheLockEnter(pCache);
1839 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1840 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1841
1842 /* Move the entry to Am and fetch it to the cache. */
1843 if (fEnough)
1844 {
1845 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1846 pdmBlkCacheAdd(pCache, pEntry->cbData);
1847 pdmBlkCacheLockLeave(pCache);
1848
1849 if (pbBuffer)
1850 pEntry->pbData = pbBuffer;
1851 else
1852 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1853 AssertPtr(pEntry->pbData);
1854
1855 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1856 &SgBuf, offDiff, cbToRead,
1857 false /* fWrite */);
1858 pdmBlkCacheEntryReadFromMedium(pEntry);
1859 /* Release the entry */
1860 pdmBlkCacheEntryRelease(pEntry);
1861 }
1862 else
1863 {
1864 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1865 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1866 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
1867 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1868 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1869
1870 pdmBlkCacheLockLeave(pCache);
1871
1872 RTMemFree(pEntry);
1873
1874 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1875 &SgBuf, off, cbToRead,
1876 PDMBLKCACHEXFERDIR_READ);
1877 }
1878 }
1879 }
1880 else
1881 {
1882#ifdef VBOX_WITH_IO_READ_CACHE
1883 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
1884 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
1885 off, cbRead,
1886 PAGE_SIZE,
1887 &cbToRead);
1888
1889 cbRead -= cbToRead;
1890
1891 if (pEntryNew)
1892 {
1893 if (!cbRead)
1894 STAM_COUNTER_INC(&pCache->cMisses);
1895 else
1896 STAM_COUNTER_INC(&pCache->cPartialHits);
1897
1898 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
1899 &SgBuf,
1900 off - pEntryNew->Core.Key,
1901 cbToRead,
1902 false /* fWrite */);
1903 pdmBlkCacheEntryReadFromMedium(pEntryNew);
1904 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1905 }
1906 else
1907 {
1908 /*
1909 * There is not enough free space in the cache.
1910 * Pass the request directly to the I/O manager.
1911 */
1912 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1913
1914 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1915 &SgBuf, off, cbToRead,
1916 PDMBLKCACHEXFERDIR_READ);
1917 }
1918#else
1919 /* Clip read size if necessary. */
1920 PPDMBLKCACHEENTRY pEntryAbove;
1921 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off,
1922 &pEntryAbove, NULL);
1923
1924 if (pEntryAbove)
1925 {
1926 if (off + cbRead > pEntryAbove->Core.Key)
1927 cbToRead = pEntryAbove->Core.Key - off;
1928 else
1929 cbToRead = cbRead;
1930
1931 pdmBlkCacheEntryRelease(pEntryAbove);
1932 }
1933 else
1934 cbToRead = cbRead;
1935
1936 cbRead -= cbToRead;
1937 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1938 &SgBuf, off, cbToRead,
1939 PDMBLKCACHEXFERDIR_READ);
1940#endif
1941 }
1942 off += cbToRead;
1943 }
1944
1945 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
1946 rc = VINF_AIO_TASK_PENDING;
1947
1948 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1949
1950 return rc;
1951}
1952
1953VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
1954 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
1955{
1956 int rc = VINF_SUCCESS;
1957 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1958 PPDMBLKCACHEENTRY pEntry;
1959 PPDMBLKCACHEREQ pReq;
1960
1961 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
1962 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
1963
1964 RTSGBUF SgBuf;
1965 RTSgBufClone(&SgBuf, pcSgBuf);
1966
1967 /* Allocate new request structure. */
1968 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
1969 if (RT_UNLIKELY(pReq))
1970 return VERR_NO_MEMORY;
1971
1972 /* Increment data transfer counter to keep the request valid while we access it. */
1973 ASMAtomicIncU32(&pReq->cXfersPending);
1974
1975 while (cbWrite)
1976 {
1977 size_t cbToWrite;
1978
1979 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1980
1981 if (pEntry)
1982 {
1983 /* Write the data into the entry and mark it as dirty */
1984 AssertPtr(pEntry->pList);
1985
1986 uint64_t offDiff = off - pEntry->Core.Key;
1987
1988 AssertMsg(off >= pEntry->Core.Key,
1989 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1990 off, pEntry->Core.Key));
1991
1992 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
1993 cbWrite -= cbToWrite;
1994
1995 if (!cbWrite)
1996 STAM_COUNTER_INC(&pCache->cHits);
1997 else
1998 STAM_COUNTER_INC(&pCache->cPartialHits);
1999
2000 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2001
2002 /* Ghost lists contain no data. */
2003 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2004 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2005 {
2006 /* Check if the entry is dirty. */
2007 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2008 PDMBLKCACHE_ENTRY_IS_DIRTY,
2009 0))
2010 {
2011 /* If it is already dirty but not in progress just update the data. */
2012 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2013 {
2014 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2015 cbToWrite);
2016 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2017 }
2018 else
2019 {
2020 /* The data isn't written to the file yet */
2021 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2022 &SgBuf, offDiff, cbToWrite,
2023 true /* fWrite */);
2024 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2025 }
2026
2027 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2028 }
2029 else /* Dirty bit not set */
2030 {
2031 /*
2032 * Check if a read is in progress for this entry.
2033 * We have to defer processing in that case.
2034 */
2035 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2036 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2037 0))
2038 {
2039 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2040 &SgBuf, offDiff, cbToWrite,
2041 true /* fWrite */);
2042 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2043 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2044 }
2045 else /* I/O in progress flag not set */
2046 {
2047 /* Write as much as we can into the entry and update the file. */
2048 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2049 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2050
2051 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2052 if (fCommit)
2053 pdmBlkCacheCommitDirtyEntries(pCache);
2054 }
2055 } /* Dirty bit not set */
2056
2057 /* Move this entry to the top position */
2058 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2059 {
2060 pdmBlkCacheLockEnter(pCache);
2061 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2062 pdmBlkCacheLockLeave(pCache);
2063 }
2064
2065 pdmBlkCacheEntryRelease(pEntry);
2066 }
2067 else /* Entry is on the ghost list */
2068 {
2069 uint8_t *pbBuffer = NULL;
2070
2071 pdmBlkCacheLockEnter(pCache);
2072 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2073 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2074
2075 if (fEnough)
2076 {
2077 /* Move the entry to Am and fetch it to the cache. */
2078 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2079 pdmBlkCacheAdd(pCache, pEntry->cbData);
2080 pdmBlkCacheLockLeave(pCache);
2081
2082 if (pbBuffer)
2083 pEntry->pbData = pbBuffer;
2084 else
2085 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2086 AssertPtr(pEntry->pbData);
2087
2088 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2089 &SgBuf, offDiff, cbToWrite,
2090 true /* fWrite */);
2091 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2092 pdmBlkCacheEntryReadFromMedium(pEntry);
2093
2094 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2095 pdmBlkCacheEntryRelease(pEntry);
2096 }
2097 else
2098 {
2099 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2100 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2101 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2102 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2103 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2104
2105 pdmBlkCacheLockLeave(pCache);
2106
2107 RTMemFree(pEntry);
2108 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2109 &SgBuf, off, cbToWrite,
2110 PDMBLKCACHEXFERDIR_WRITE);
2111 }
2112 }
2113 }
2114 else /* No entry found */
2115 {
2116 /*
2117 * No entry found. Try to create a new cache entry to store the data in and if that fails
2118 * write directly to the file.
2119 */
2120 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2121 off, cbWrite,
2122 512, &cbToWrite);
2123
2124 cbWrite -= cbToWrite;
2125
2126 if (pEntryNew)
2127 {
2128 uint64_t offDiff = off - pEntryNew->Core.Key;
2129
2130 STAM_COUNTER_INC(&pCache->cHits);
2131
2132 /*
2133 * Check if it is possible to just write the data without waiting
2134 * for it to get fetched first.
2135 */
2136 if (!offDiff && pEntryNew->cbData == cbToWrite)
2137 {
2138 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2139 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2140
2141 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2142 if (fCommit)
2143 pdmBlkCacheCommitDirtyEntries(pCache);
2144 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2145 }
2146 else
2147 {
2148 /* Defer the write and fetch the data from the endpoint. */
2149 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2150 &SgBuf, offDiff, cbToWrite,
2151 true /* fWrite */);
2152 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2153 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2154 }
2155
2156 pdmBlkCacheEntryRelease(pEntryNew);
2157 }
2158 else
2159 {
2160 /*
2161 * There is not enough free space in the cache.
2162 * Pass the request directly to the I/O manager.
2163 */
2164 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2165
2166 STAM_COUNTER_INC(&pCache->cMisses);
2167
2168 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2169 &SgBuf, off, cbToWrite,
2170 PDMBLKCACHEXFERDIR_WRITE);
2171 }
2172 }
2173
2174 off += cbToWrite;
2175 }
2176
2177 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2178 rc = VINF_AIO_TASK_PENDING;
2179
2180 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2181
2182 return rc;
2183}
2184
2185VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2186{
2187 int rc = VINF_SUCCESS;
2188
2189 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2190
2191 /* Commit dirty entries in the cache. */
2192 pdmBlkCacheCommit(pBlkCache);
2193
2194 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2195 return rc;
2196}
2197
2198/**
2199 * Completes a task segment freeing all resources and completes the task handle
2200 * if everything was transferred.
2201 *
2202 * @returns Next task segment handle.
2203 * @param pTaskSeg Task segment to complete.
2204 * @param rc Status code to set.
2205 */
2206static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2207 PPDMBLKCACHEWAITER pWaiter,
2208 int rc)
2209{
2210 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2211 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2212
2213 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2214
2215 RTMemFree(pWaiter);
2216
2217 return pNext;
2218}
2219
2220static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2221{
2222 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2223 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2224
2225 /* Reference the entry now as we are clearing the I/O in progress flag
2226 * which protected the entry till now. */
2227 pdmBlkCacheEntryRef(pEntry);
2228
2229 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2230 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2231
2232 /* Process waiting segment list. The data in entry might have changed in-between. */
2233 bool fDirty = false;
2234 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2235 PPDMBLKCACHEWAITER pCurr = pComplete;
2236
2237 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2238 ("The list tail was not updated correctly\n"));
2239 pEntry->pWaitingTail = NULL;
2240 pEntry->pWaitingHead = NULL;
2241
2242 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2243 {
2244 /*
2245 * An error here is difficult to handle as the original request completed already.
2246 * The error is logged for now and the VM is paused.
2247 * If the user continues the entry is written again in the hope
2248 * the user fixed the problem and the next write succeeds.
2249 */
2250 /** @todo r=aeichner: This solution doesn't work
2251 * The user will get the message but the VM will hang afterwards
2252 * VMR3Suspend() returns when the VM is suspended but suspending
2253 * the VM will reopen the images readonly in DrvVD. They are closed first
2254 * which will close the endpoints. This will block EMT while the
2255 * I/O manager processes the close request but the IO manager is stuck
2256 * in the VMR3Suspend call and can't process the request.
2257 * Another problem is that closing the VM means flushing the cache
2258 * but the entry failed and will probably fail again.
2259 * No idea so far how to solve this problem... but the user gets informed
2260 * at least.
2261 */
2262 if (RT_FAILURE(rcIoXfer))
2263 {
2264 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\"\n",
2265 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId));
2266
2267 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2268 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc)."
2269 "Make sure there is enough free space on the disk and that the disk is working properly."
2270 "Operation can be resumed afterwards."),
2271 pBlkCache->pszId, rcIoXfer);
2272 AssertRC(rc);
2273 }
2274 else
2275 {
2276 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2277
2278 while (pCurr)
2279 {
2280 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2281
2282 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2283 fDirty = true;
2284
2285 pCurr = pCurr->pNext;
2286 }
2287 }
2288 }
2289 else
2290 {
2291 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2292 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2293 ("Invalid flags set\n"));
2294
2295 while (pCurr)
2296 {
2297 if (pCurr->fWrite)
2298 {
2299 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2300 fDirty = true;
2301 }
2302 else
2303 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2304
2305 pCurr = pCurr->pNext;
2306 }
2307 }
2308
2309 bool fCommit = false;
2310 if (fDirty)
2311 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2312
2313 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2314
2315 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2316 pdmBlkCacheEntryRelease(pEntry);
2317
2318 if (fCommit)
2319 pdmBlkCacheCommitDirtyEntries(pCache);
2320
2321 /* Complete waiters now. */
2322 while (pComplete)
2323 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2324}
2325
2326VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2327{
2328 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2329
2330 if (hIoXfer->fIoCache)
2331 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2332 else
2333 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2334}
2335
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette