VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 65299

Last change on this file since 65299 was 64766, checked in by vboxsync, 8 years ago

src/VBox: Make the use of the iterator for RTListForEach()/RTListForEachSafe() more obvious. There is no need to initialize the iterator and we also must not depend on the iterator being NULL if the list was empty.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.4 KB
Line 
1/* $Id: PDMBlkCache.cpp 64766 2016-11-30 10:59:48Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22
23/*********************************************************************************************************************************
24* Header Files *
25*********************************************************************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
27#include "PDMInternal.h"
28#include <iprt/asm.h>
29#include <iprt/mem.h>
30#include <iprt/path.h>
31#include <iprt/string.h>
32#include <VBox/log.h>
33#include <VBox/vmm/stam.h>
34#include <VBox/vmm/uvm.h>
35#include <VBox/vmm/vm.h>
36
37#include "PDMBlkCacheInternal.h"
38
39#ifdef VBOX_STRICT
40# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
41 do \
42 { \
43 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
44 ("Thread does not own critical section\n"));\
45 } while (0)
46
47# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
48 do \
49 { \
50 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
51 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
52 } while (0)
53
54# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
55 do \
56 { \
57 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
58 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
59 } while (0)
60
61#else
62# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
63# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
64# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
65#endif
66
67#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
68
69
70/*********************************************************************************************************************************
71* Internal Functions *
72*********************************************************************************************************************************/
73
74static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
75 uint64_t off, size_t cbData, uint8_t *pbBuffer);
76static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
77
78/**
79 * Decrement the reference counter of the given cache entry.
80 *
81 * @returns nothing.
82 * @param pEntry The entry to release.
83 */
84DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
85{
86 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
87 ASMAtomicDecU32(&pEntry->cRefs);
88}
89
90/**
91 * Increment the reference counter of the given cache entry.
92 *
93 * @returns nothing.
94 * @param pEntry The entry to reference.
95 */
96DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
97{
98 ASMAtomicIncU32(&pEntry->cRefs);
99}
100
101#ifdef VBOX_STRICT
102static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
103{
104 /* Amount of cached data should never exceed the maximum amount. */
105 AssertMsg(pCache->cbCached <= pCache->cbMax,
106 ("Current amount of cached data exceeds maximum\n"));
107
108 /* The amount of cached data in the LRU and FRU list should match cbCached */
109 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
110 ("Amount of cached data doesn't match\n"));
111
112 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
113 ("Paged out list exceeds maximum\n"));
114}
115#endif
116
117DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
118{
119 RTCritSectEnter(&pCache->CritSect);
120#ifdef VBOX_STRICT
121 pdmBlkCacheValidate(pCache);
122#endif
123}
124
125DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
126{
127#ifdef VBOX_STRICT
128 pdmBlkCacheValidate(pCache);
129#endif
130 RTCritSectLeave(&pCache->CritSect);
131}
132
133DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
134{
135 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
136 pCache->cbCached -= cbAmount;
137}
138
139DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
140{
141 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
142 pCache->cbCached += cbAmount;
143}
144
145DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
146{
147 pList->cbCached += cbAmount;
148}
149
150DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
151{
152 pList->cbCached -= cbAmount;
153}
154
155#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
156/**
157 * Checks consistency of a LRU list.
158 *
159 * @returns nothing
160 * @param pList The LRU list to check.
161 * @param pNotInList Element which is not allowed to occur in the list.
162 */
163static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
164{
165 PPDMBLKCACHEENTRY pCurr = pList->pHead;
166
167 /* Check that there are no double entries and no cycles in the list. */
168 while (pCurr)
169 {
170 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
171
172 while (pNext)
173 {
174 AssertMsg(pCurr != pNext,
175 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
176 pCurr, pList));
177 pNext = pNext->pNext;
178 }
179
180 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
181
182 if (!pCurr->pNext)
183 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
184
185 pCurr = pCurr->pNext;
186 }
187}
188#endif
189
190/**
191 * Unlinks a cache entry from the LRU list it is assigned to.
192 *
193 * @returns nothing.
194 * @param pEntry The entry to unlink.
195 */
196static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
197{
198 PPDMBLKLRULIST pList = pEntry->pList;
199 PPDMBLKCACHEENTRY pPrev, pNext;
200
201 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
202
203 AssertPtr(pList);
204
205#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
206 pdmBlkCacheCheckList(pList, NULL);
207#endif
208
209 pPrev = pEntry->pPrev;
210 pNext = pEntry->pNext;
211
212 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
213 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
214
215 if (pPrev)
216 pPrev->pNext = pNext;
217 else
218 {
219 pList->pHead = pNext;
220
221 if (pNext)
222 pNext->pPrev = NULL;
223 }
224
225 if (pNext)
226 pNext->pPrev = pPrev;
227 else
228 {
229 pList->pTail = pPrev;
230
231 if (pPrev)
232 pPrev->pNext = NULL;
233 }
234
235 pEntry->pList = NULL;
236 pEntry->pPrev = NULL;
237 pEntry->pNext = NULL;
238 pdmBlkCacheListSub(pList, pEntry->cbData);
239#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
240 pdmBlkCacheCheckList(pList, pEntry);
241#endif
242}
243
244/**
245 * Adds a cache entry to the given LRU list unlinking it from the currently
246 * assigned list if needed.
247 *
248 * @returns nothing.
249 * @param pList List to the add entry to.
250 * @param pEntry Entry to add.
251 */
252static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
253{
254 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
255#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
256 pdmBlkCacheCheckList(pList, NULL);
257#endif
258
259 /* Remove from old list if needed */
260 if (pEntry->pList)
261 pdmBlkCacheEntryRemoveFromList(pEntry);
262
263 pEntry->pNext = pList->pHead;
264 if (pList->pHead)
265 pList->pHead->pPrev = pEntry;
266 else
267 {
268 Assert(!pList->pTail);
269 pList->pTail = pEntry;
270 }
271
272 pEntry->pPrev = NULL;
273 pList->pHead = pEntry;
274 pdmBlkCacheListAdd(pList, pEntry->cbData);
275 pEntry->pList = pList;
276#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
277 pdmBlkCacheCheckList(pList, NULL);
278#endif
279}
280
281/**
282 * Destroys a LRU list freeing all entries.
283 *
284 * @returns nothing
285 * @param pList Pointer to the LRU list to destroy.
286 *
287 * @note The caller must own the critical section of the cache.
288 */
289static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
290{
291 while (pList->pHead)
292 {
293 PPDMBLKCACHEENTRY pEntry = pList->pHead;
294
295 pList->pHead = pEntry->pNext;
296
297 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
298 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
299
300 RTMemPageFree(pEntry->pbData, pEntry->cbData);
301 RTMemFree(pEntry);
302 }
303}
304
305/**
306 * Tries to remove the given amount of bytes from a given list in the cache
307 * moving the entries to one of the given ghosts lists
308 *
309 * @returns Amount of data which could be freed.
310 * @param pCache Pointer to the global cache data.
311 * @param cbData The amount of the data to free.
312 * @param pListSrc The source list to evict data from.
313 * @param pGhostListDst Where the ghost list removed entries should be
314 * moved to, NULL if the entry should be freed.
315 * @param fReuseBuffer Flag whether a buffer should be reused if it has
316 * the same size
317 * @param ppbBuffer Where to store the address of the buffer if an
318 * entry with the same size was found and
319 * fReuseBuffer is true.
320 *
321 * @note This function may return fewer bytes than requested because entries
322 * may be marked as non evictable if they are used for I/O at the
323 * moment.
324 */
325static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
326 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
327 bool fReuseBuffer, uint8_t **ppbBuffer)
328{
329 size_t cbEvicted = 0;
330
331 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
332
333 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
334 AssertMsg( !pGhostListDst
335 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
336 ("Destination list must be NULL or the recently used but paged out list\n"));
337
338 if (fReuseBuffer)
339 {
340 AssertPtr(ppbBuffer);
341 *ppbBuffer = NULL;
342 }
343
344 /* Start deleting from the tail. */
345 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
346
347 while ((cbEvicted < cbData) && pEntry)
348 {
349 PPDMBLKCACHEENTRY pCurr = pEntry;
350
351 pEntry = pEntry->pPrev;
352
353 /* We can't evict pages which are currently in progress or dirty but not in progress */
354 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
355 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
356 {
357 /* Ok eviction candidate. Grab the endpoint semaphore and check again
358 * because somebody else might have raced us. */
359 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
360 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
361
362 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
363 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
364 {
365 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
366
367 if (fReuseBuffer && pCurr->cbData == cbData)
368 {
369 STAM_COUNTER_INC(&pCache->StatBuffersReused);
370 *ppbBuffer = pCurr->pbData;
371 }
372 else if (pCurr->pbData)
373 RTMemPageFree(pCurr->pbData, pCurr->cbData);
374
375 pCurr->pbData = NULL;
376 cbEvicted += pCurr->cbData;
377
378 pdmBlkCacheEntryRemoveFromList(pCurr);
379 pdmBlkCacheSub(pCache, pCurr->cbData);
380
381 if (pGhostListDst)
382 {
383 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
384
385 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
386
387 /* We have to remove the last entries from the paged out list. */
388 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
389 && pGhostEntFree)
390 {
391 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
392 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
393
394 pGhostEntFree = pGhostEntFree->pPrev;
395
396 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
397
398 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
399 {
400 pdmBlkCacheEntryRemoveFromList(pFree);
401
402 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
403 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
404 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
405
406 RTMemFree(pFree);
407 }
408
409 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
410 }
411
412 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
413 {
414 /* Couldn't remove enough entries. Delete */
415 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
416 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
417 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
418
419 RTMemFree(pCurr);
420 }
421 else
422 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
423 }
424 else
425 {
426 /* Delete the entry from the AVL tree it is assigned to. */
427 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
428 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
429 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
430
431 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
432 RTMemFree(pCurr);
433 }
434 }
435
436 }
437 else
438 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
439 }
440
441 return cbEvicted;
442}
443
444static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
445{
446 size_t cbRemoved = 0;
447
448 if ((pCache->cbCached + cbData) < pCache->cbMax)
449 return true;
450 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
451 {
452 /* Try to evict as many bytes as possible from A1in */
453 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
454 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
455
456 /*
457 * If it was not possible to remove enough entries
458 * try the frequently accessed cache.
459 */
460 if (cbRemoved < cbData)
461 {
462 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
463
464 /*
465 * If we removed something we can't pass the reuse buffer flag anymore because
466 * we don't need to evict that much data
467 */
468 if (!cbRemoved)
469 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
470 NULL, fReuseBuffer, ppbBuffer);
471 else
472 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
473 NULL, false, NULL);
474 }
475 }
476 else
477 {
478 /* We have to remove entries from frequently access list. */
479 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
480 NULL, fReuseBuffer, ppbBuffer);
481 }
482
483 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
484 return (cbRemoved >= cbData);
485}
486
487DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
488{
489 int rc = VINF_SUCCESS;
490
491 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
492 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
493
494 switch (pBlkCache->enmType)
495 {
496 case PDMBLKCACHETYPE_DEV:
497 {
498 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
499 pIoXfer->enmXferDir,
500 off, cbXfer,
501 &pIoXfer->SgBuf, pIoXfer);
502 break;
503 }
504 case PDMBLKCACHETYPE_DRV:
505 {
506 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
507 pIoXfer->enmXferDir,
508 off, cbXfer,
509 &pIoXfer->SgBuf, pIoXfer);
510 break;
511 }
512 case PDMBLKCACHETYPE_USB:
513 {
514 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
515 pIoXfer->enmXferDir,
516 off, cbXfer,
517 &pIoXfer->SgBuf, pIoXfer);
518 break;
519 }
520 case PDMBLKCACHETYPE_INTERNAL:
521 {
522 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
523 pIoXfer->enmXferDir,
524 off, cbXfer,
525 &pIoXfer->SgBuf, pIoXfer);
526 break;
527 }
528 default:
529 AssertMsgFailed(("Unknown block cache type!\n"));
530 }
531
532 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
533 return rc;
534}
535
536/**
537 * Initiates a read I/O task for the given entry.
538 *
539 * @returns VBox status code.
540 * @param pEntry The entry to fetch the data to.
541 */
542static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
543{
544 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
545 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
546
547 /* Make sure no one evicts the entry while it is accessed. */
548 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
549
550 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
551 if (RT_UNLIKELY(!pIoXfer))
552 return VERR_NO_MEMORY;
553
554 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
555
556 pIoXfer->fIoCache = true;
557 pIoXfer->pEntry = pEntry;
558 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
559 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
560 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
561 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
562
563 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
564}
565
566/**
567 * Initiates a write I/O task for the given entry.
568 *
569 * @returns nothing.
570 * @param pEntry The entry to read the data from.
571 */
572static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
573{
574 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
575 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
576
577 /* Make sure no one evicts the entry while it is accessed. */
578 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
579
580 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
581 if (RT_UNLIKELY(!pIoXfer))
582 return VERR_NO_MEMORY;
583
584 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
585
586 pIoXfer->fIoCache = true;
587 pIoXfer->pEntry = pEntry;
588 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
589 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
590 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
591 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
592
593 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
594}
595
596/**
597 * Passthrough a part of a request directly to the I/O manager handling the
598 * endpoint.
599 *
600 * @returns VBox status code.
601 * @param pBlkCache The endpoint cache.
602 * @param pReq The request.
603 * @param pSgBuf The scatter/gather buffer.
604 * @param offStart Offset to start transfer from.
605 * @param cbData Amount of data to transfer.
606 * @param enmXferDir The transfer type (read/write)
607 */
608static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
609 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
610 PDMBLKCACHEXFERDIR enmXferDir)
611{
612
613 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
614 if (RT_UNLIKELY(!pIoXfer))
615 return VERR_NO_MEMORY;
616
617 ASMAtomicIncU32(&pReq->cXfersPending);
618 pIoXfer->fIoCache = false;
619 pIoXfer->pReq = pReq;
620 pIoXfer->enmXferDir = enmXferDir;
621 if (pSgBuf)
622 {
623 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
624 RTSgBufAdvance(pSgBuf, cbData);
625 }
626
627 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
628}
629
630/**
631 * Commit a single dirty entry to the endpoint
632 *
633 * @returns nothing
634 * @param pEntry The entry to commit.
635 */
636static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
637{
638 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
639 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
640 ("Invalid flags set for entry %#p\n", pEntry));
641
642 pdmBlkCacheEntryWriteToMedium(pEntry);
643}
644
645/**
646 * Commit all dirty entries for a single endpoint.
647 *
648 * @returns nothing.
649 * @param pBlkCache The endpoint cache to commit.
650 */
651static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
652{
653 uint32_t cbCommitted = 0;
654
655 /* Return if the cache was suspended. */
656 if (pBlkCache->fSuspended)
657 return;
658
659 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
660
661 /* The list is moved to a new header to reduce locking overhead. */
662 RTLISTANCHOR ListDirtyNotCommitted;
663
664 RTListInit(&ListDirtyNotCommitted);
665 RTSpinlockAcquire(pBlkCache->LockList);
666 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
667 RTSpinlockRelease(pBlkCache->LockList);
668
669 if (!RTListIsEmpty(&ListDirtyNotCommitted))
670 {
671 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
672
673 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
674 {
675 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
676 NodeNotCommitted);
677 pdmBlkCacheEntryCommit(pEntry);
678 cbCommitted += pEntry->cbData;
679 RTListNodeRemove(&pEntry->NodeNotCommitted);
680 pEntry = pNext;
681 }
682
683 /* Commit the last endpoint */
684 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
685 pdmBlkCacheEntryCommit(pEntry);
686 cbCommitted += pEntry->cbData;
687 RTListNodeRemove(&pEntry->NodeNotCommitted);
688 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
689 ("Committed all entries but list is not empty\n"));
690 }
691
692 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
693 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
694 ("Number of committed bytes exceeds number of dirty bytes\n"));
695 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
696
697 /* Reset the commit timer if we don't have any dirty bits. */
698 if ( !(cbDirtyOld - cbCommitted)
699 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
700 TMTimerStop(pBlkCache->pCache->pTimerCommit);
701}
702
703/**
704 * Commit all dirty entries in the cache.
705 *
706 * @returns nothing.
707 * @param pCache The global cache instance.
708 */
709static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
710{
711 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
712
713 if (!fCommitInProgress)
714 {
715 pdmBlkCacheLockEnter(pCache);
716 Assert(!RTListIsEmpty(&pCache->ListUsers));
717
718 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
719 AssertPtr(pBlkCache);
720
721 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
722 {
723 pdmBlkCacheCommit(pBlkCache);
724
725 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
726 NodeCacheUser);
727 }
728
729 /* Commit the last endpoint */
730 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
731 pdmBlkCacheCommit(pBlkCache);
732
733 pdmBlkCacheLockLeave(pCache);
734 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
735 }
736}
737
738/**
739 * Adds the given entry as a dirty to the cache.
740 *
741 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
742 * @param pBlkCache The endpoint cache the entry belongs to.
743 * @param pEntry The entry to add.
744 */
745static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
746{
747 bool fDirtyBytesExceeded = false;
748 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
749
750 /* If the commit timer is disabled we commit right away. */
751 if (pCache->u32CommitTimeoutMs == 0)
752 {
753 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
754 pdmBlkCacheEntryCommit(pEntry);
755 }
756 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
757 {
758 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
759
760 RTSpinlockAcquire(pBlkCache->LockList);
761 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
762 RTSpinlockRelease(pBlkCache->LockList);
763
764 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
765
766 /* Prevent committing if the VM was suspended. */
767 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
768 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
769 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
770 {
771 /* Arm the commit timer. */
772 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
773 }
774 }
775
776 return fDirtyBytesExceeded;
777}
778
779static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
780{
781 bool fFound = false;
782
783 PPDMBLKCACHE pBlkCache;
784 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
785 {
786 if (!RTStrCmp(pBlkCache->pszId, pcszId))
787 {
788 fFound = true;
789 break;
790 }
791 }
792
793 return fFound ? pBlkCache : NULL;
794}
795
796/**
797 * Commit timer callback.
798 */
799static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
800{
801 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
802 NOREF(pVM); NOREF(pTimer);
803
804 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
805
806 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
807 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
808 pdmBlkCacheCommitDirtyEntries(pCache);
809
810 LogFlowFunc(("Entries committed, going to sleep\n"));
811}
812
813static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
814{
815 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
816
817 AssertPtr(pBlkCacheGlobal);
818
819 pdmBlkCacheLockEnter(pBlkCacheGlobal);
820
821 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
822
823 /* Go through the list and save all dirty entries. */
824 PPDMBLKCACHE pBlkCache;
825 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
826 {
827 uint32_t cEntries = 0;
828 PPDMBLKCACHEENTRY pEntry;
829
830 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
831 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
832 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
833
834 /* Count the number of entries to safe. */
835 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
836 {
837 cEntries++;
838 }
839
840 SSMR3PutU32(pSSM, cEntries);
841
842 /* Walk the list of all dirty entries and save them. */
843 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
844 {
845 /* A few sanity checks. */
846 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
847 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
848 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
849 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
850 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
851 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
852 ("Invalid list\n"));
853 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
854 ("Size and range do not match\n"));
855
856 /* Save */
857 SSMR3PutU64(pSSM, pEntry->Core.Key);
858 SSMR3PutU32(pSSM, pEntry->cbData);
859 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
860 }
861
862 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
863 }
864
865 pdmBlkCacheLockLeave(pBlkCacheGlobal);
866
867 /* Terminator */
868 return SSMR3PutU32(pSSM, UINT32_MAX);
869}
870
871static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
872{
873 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
874 uint32_t cRefs;
875
876 NOREF(uPass);
877 AssertPtr(pBlkCacheGlobal);
878
879 pdmBlkCacheLockEnter(pBlkCacheGlobal);
880
881 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
882 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
883
884 SSMR3GetU32(pSSM, &cRefs);
885
886 /*
887 * Fewer users in the saved state than in the current VM are allowed
888 * because that means that there are only new ones which don't have any saved state
889 * which can get lost.
890 * More saved state entries than registered cache users are only allowed if the
891 * missing users don't have any data saved in the cache.
892 */
893 int rc = VINF_SUCCESS;
894 char *pszId = NULL;
895
896 while ( cRefs > 0
897 && RT_SUCCESS(rc))
898 {
899 PPDMBLKCACHE pBlkCache = NULL;
900 uint32_t cbId = 0;
901
902 SSMR3GetU32(pSSM, &cbId);
903 Assert(cbId > 0);
904
905 cbId++; /* Include terminator */
906 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
907 if (!pszId)
908 {
909 rc = VERR_NO_MEMORY;
910 break;
911 }
912
913 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
914 AssertRC(rc);
915
916 /* Search for the block cache with the provided id. */
917 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
918
919 /* Get the entries */
920 uint32_t cEntries;
921 SSMR3GetU32(pSSM, &cEntries);
922
923 if (!pBlkCache && (cEntries > 0))
924 {
925 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
926 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
927 break;
928 }
929
930 RTMemFree(pszId);
931 pszId = NULL;
932
933 while (cEntries > 0)
934 {
935 PPDMBLKCACHEENTRY pEntry;
936 uint64_t off;
937 uint32_t cbEntry;
938
939 SSMR3GetU64(pSSM, &off);
940 SSMR3GetU32(pSSM, &cbEntry);
941
942 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
943 if (!pEntry)
944 {
945 rc = VERR_NO_MEMORY;
946 break;
947 }
948
949 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
950 if (RT_FAILURE(rc))
951 {
952 RTMemFree(pEntry->pbData);
953 RTMemFree(pEntry);
954 break;
955 }
956
957 /* Insert into the tree. */
958 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
959 Assert(fInserted); NOREF(fInserted);
960
961 /* Add to the dirty list. */
962 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
963 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
964 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
965 pdmBlkCacheEntryRelease(pEntry);
966 cEntries--;
967 }
968
969 cRefs--;
970 }
971
972 if (pszId)
973 RTMemFree(pszId);
974
975 if (cRefs && RT_SUCCESS(rc))
976 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
977 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
978
979 pdmBlkCacheLockLeave(pBlkCacheGlobal);
980
981 if (RT_SUCCESS(rc))
982 {
983 uint32_t u32 = 0;
984 rc = SSMR3GetU32(pSSM, &u32);
985 if (RT_SUCCESS(rc))
986 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
987 }
988
989 return rc;
990}
991
992int pdmR3BlkCacheInit(PVM pVM)
993{
994 int rc = VINF_SUCCESS;
995 PUVM pUVM = pVM->pUVM;
996 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
997
998 LogFlowFunc((": pVM=%p\n", pVM));
999
1000 VM_ASSERT_EMT(pVM);
1001
1002 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1003 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1004
1005 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1006 if (!pBlkCacheGlobal)
1007 return VERR_NO_MEMORY;
1008
1009 RTListInit(&pBlkCacheGlobal->ListUsers);
1010 pBlkCacheGlobal->pVM = pVM;
1011 pBlkCacheGlobal->cRefs = 0;
1012 pBlkCacheGlobal->cbCached = 0;
1013 pBlkCacheGlobal->fCommitInProgress = false;
1014
1015 /* Initialize members */
1016 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1017 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1018 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1019
1020 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1021 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1022 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1023
1024 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1025 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1026 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1027
1028 do
1029 {
1030 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1031 AssertLogRelRCBreak(rc);
1032 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1033
1034 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1035 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1036 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1037 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1038
1039 /** @todo r=aeichner: Experiment to find optimal default values */
1040 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1041 AssertLogRelRCBreak(rc);
1042 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1043 AssertLogRelRCBreak(rc);
1044 } while (0);
1045
1046 if (RT_SUCCESS(rc))
1047 {
1048 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1049 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1050 "/PDM/BlkCache/cbMax",
1051 STAMUNIT_BYTES,
1052 "Maximum cache size");
1053 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1054 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1055 "/PDM/BlkCache/cbCached",
1056 STAMUNIT_BYTES,
1057 "Currently used cache");
1058 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1059 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1060 "/PDM/BlkCache/cbCachedMruIn",
1061 STAMUNIT_BYTES,
1062 "Number of bytes cached in MRU list");
1063 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1064 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1065 "/PDM/BlkCache/cbCachedMruOut",
1066 STAMUNIT_BYTES,
1067 "Number of bytes cached in FRU list");
1068 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1069 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1070 "/PDM/BlkCache/cbCachedFru",
1071 STAMUNIT_BYTES,
1072 "Number of bytes cached in FRU ghost list");
1073
1074#ifdef VBOX_WITH_STATISTICS
1075 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1076 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1077 "/PDM/BlkCache/CacheHits",
1078 STAMUNIT_COUNT, "Number of hits in the cache");
1079 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1080 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1081 "/PDM/BlkCache/CachePartialHits",
1082 STAMUNIT_COUNT, "Number of partial hits in the cache");
1083 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1084 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1085 "/PDM/BlkCache/CacheMisses",
1086 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1087 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1088 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1089 "/PDM/BlkCache/CacheRead",
1090 STAMUNIT_BYTES, "Number of bytes read from the cache");
1091 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1092 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1093 "/PDM/BlkCache/CacheWritten",
1094 STAMUNIT_BYTES, "Number of bytes written to the cache");
1095 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1096 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1097 "/PDM/BlkCache/CacheTreeGet",
1098 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1099 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1100 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1101 "/PDM/BlkCache/CacheTreeInsert",
1102 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1103 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1104 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1105 "/PDM/BlkCache/CacheTreeRemove",
1106 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1108 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/CacheBuffersReused",
1110 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1111#endif
1112
1113 /* Initialize the critical section */
1114 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1115 }
1116
1117 if (RT_SUCCESS(rc))
1118 {
1119 /* Create the commit timer */
1120 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1121 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1122 pdmBlkCacheCommitTimerCallback,
1123 pBlkCacheGlobal,
1124 "BlkCache-Commit",
1125 &pBlkCacheGlobal->pTimerCommit);
1126
1127 if (RT_SUCCESS(rc))
1128 {
1129 /* Register saved state handler. */
1130 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1131 NULL, NULL, NULL,
1132 NULL, pdmR3BlkCacheSaveExec, NULL,
1133 NULL, pdmR3BlkCacheLoadExec, NULL);
1134 if (RT_SUCCESS(rc))
1135 {
1136 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1137 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1138 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1139 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1140 return VINF_SUCCESS;
1141 }
1142 }
1143
1144 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1145 }
1146
1147 if (pBlkCacheGlobal)
1148 RTMemFree(pBlkCacheGlobal);
1149
1150 LogFlowFunc((": returns rc=%Rrc\n", rc));
1151 return rc;
1152}
1153
1154void pdmR3BlkCacheTerm(PVM pVM)
1155{
1156 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1157
1158 if (pBlkCacheGlobal)
1159 {
1160 /* Make sure no one else uses the cache now */
1161 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1162
1163 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1164 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1165 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1166 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1167
1168 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1169
1170 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1171 RTMemFree(pBlkCacheGlobal);
1172 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1173 }
1174}
1175
1176int pdmR3BlkCacheResume(PVM pVM)
1177{
1178 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1179
1180 LogFlowFunc(("pVM=%#p\n", pVM));
1181
1182 if ( pBlkCacheGlobal
1183 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1184 {
1185 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1186 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1187 }
1188
1189 return VINF_SUCCESS;
1190}
1191
1192static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1193{
1194 int rc = VINF_SUCCESS;
1195 PPDMBLKCACHE pBlkCache = NULL;
1196 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1197
1198 if (!pBlkCacheGlobal)
1199 return VERR_NOT_SUPPORTED;
1200
1201 /*
1202 * Check that no other user cache has the same id first,
1203 * Unique id's are necessary in case the state is saved.
1204 */
1205 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1206
1207 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1208
1209 if (!pBlkCache)
1210 {
1211 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1212
1213 if (pBlkCache)
1214 pBlkCache->pszId = RTStrDup(pcszId);
1215
1216 if ( pBlkCache
1217 && pBlkCache->pszId)
1218 {
1219 pBlkCache->fSuspended = false;
1220 pBlkCache->pCache = pBlkCacheGlobal;
1221 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1222
1223 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1224 if (RT_SUCCESS(rc))
1225 {
1226 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1227 if (RT_SUCCESS(rc))
1228 {
1229 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1230 if (pBlkCache->pTree)
1231 {
1232#ifdef VBOX_WITH_STATISTICS
1233 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1234 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1235 STAMUNIT_COUNT, "Number of deferred writes",
1236 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1237#endif
1238
1239 /* Add to the list of users. */
1240 pBlkCacheGlobal->cRefs++;
1241 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1242 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1243
1244 *ppBlkCache = pBlkCache;
1245 LogFlowFunc(("returns success\n"));
1246 return VINF_SUCCESS;
1247 }
1248
1249 rc = VERR_NO_MEMORY;
1250 RTSemRWDestroy(pBlkCache->SemRWEntries);
1251 }
1252
1253 RTSpinlockDestroy(pBlkCache->LockList);
1254 }
1255
1256 RTStrFree(pBlkCache->pszId);
1257 }
1258 else
1259 rc = VERR_NO_MEMORY;
1260
1261 if (pBlkCache)
1262 RTMemFree(pBlkCache);
1263 }
1264 else
1265 rc = VERR_ALREADY_EXISTS;
1266
1267 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1268
1269 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1270 return rc;
1271}
1272
1273VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1274 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1275 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1276 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1277 const char *pcszId)
1278{
1279 int rc = VINF_SUCCESS;
1280 PPDMBLKCACHE pBlkCache;
1281
1282 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1283 if (RT_SUCCESS(rc))
1284 {
1285 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1286 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1287 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1288 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1289 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1290 *ppBlkCache = pBlkCache;
1291 }
1292
1293 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1294 return rc;
1295}
1296
1297VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1298 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1299 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1300 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1301 const char *pcszId)
1302{
1303 int rc = VINF_SUCCESS;
1304 PPDMBLKCACHE pBlkCache;
1305
1306 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1307 if (RT_SUCCESS(rc))
1308 {
1309 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1310 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1311 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1312 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1313 pBlkCache->u.Dev.pDevIns = pDevIns;
1314 *ppBlkCache = pBlkCache;
1315 }
1316
1317 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1318 return rc;
1319
1320}
1321
1322VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1323 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1324 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1325 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1326 const char *pcszId)
1327{
1328 int rc = VINF_SUCCESS;
1329 PPDMBLKCACHE pBlkCache;
1330
1331 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1332 if (RT_SUCCESS(rc))
1333 {
1334 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1335 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1336 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1337 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1338 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1339 *ppBlkCache = pBlkCache;
1340 }
1341
1342 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1343 return rc;
1344
1345}
1346
1347VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1348 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1349 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1350 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1351 const char *pcszId)
1352{
1353 int rc = VINF_SUCCESS;
1354 PPDMBLKCACHE pBlkCache;
1355
1356 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1357 if (RT_SUCCESS(rc))
1358 {
1359 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1360 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1361 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1362 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1363 pBlkCache->u.Int.pvUser = pvUser;
1364 *ppBlkCache = pBlkCache;
1365 }
1366
1367 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1368 return rc;
1369
1370}
1371
1372/**
1373 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1374 *
1375 * @returns IPRT status code.
1376 * @param pNode The node to destroy.
1377 * @param pvUser Opaque user data.
1378 */
1379static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1380{
1381 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1382 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1383 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1384
1385 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1386 {
1387 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1388 pdmBlkCacheEntryRef(pEntry);
1389 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1390 pdmBlkCacheLockLeave(pCache);
1391
1392 RTThreadSleep(250);
1393
1394 /* Re-enter all locks */
1395 pdmBlkCacheLockEnter(pCache);
1396 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1397 pdmBlkCacheEntryRelease(pEntry);
1398 }
1399
1400 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1401 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1402
1403 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1404 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1405
1406 pdmBlkCacheEntryRemoveFromList(pEntry);
1407
1408 if (fUpdateCache)
1409 pdmBlkCacheSub(pCache, pEntry->cbData);
1410
1411 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1412 RTMemFree(pEntry);
1413
1414 return VINF_SUCCESS;
1415}
1416
1417/**
1418 * Destroys all cache resources used by the given endpoint.
1419 *
1420 * @returns nothing.
1421 * @param pBlkCache Block cache handle.
1422 */
1423VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1424{
1425 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1426
1427 /*
1428 * Commit all dirty entries now (they are waited on for completion during the
1429 * destruction of the AVL tree below).
1430 * The exception is if the VM was paused because of an I/O error before.
1431 */
1432 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1433 pdmBlkCacheCommit(pBlkCache);
1434
1435 /* Make sure nobody is accessing the cache while we delete the tree. */
1436 pdmBlkCacheLockEnter(pCache);
1437 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1438 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1439 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1440
1441 RTSpinlockDestroy(pBlkCache->LockList);
1442
1443 pCache->cRefs--;
1444 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1445
1446 pdmBlkCacheLockLeave(pCache);
1447
1448 RTSemRWDestroy(pBlkCache->SemRWEntries);
1449
1450#ifdef VBOX_WITH_STATISTICS
1451 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1452#endif
1453
1454 RTStrFree(pBlkCache->pszId);
1455 RTMemFree(pBlkCache);
1456}
1457
1458VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1459{
1460 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1461
1462 /*
1463 * Validate input.
1464 */
1465 if (!pDevIns)
1466 return;
1467 VM_ASSERT_EMT(pVM);
1468
1469 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1470 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1471
1472 /* Return silently if not supported. */
1473 if (!pBlkCacheGlobal)
1474 return;
1475
1476 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1477
1478 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1479 {
1480 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1481 && pBlkCache->u.Dev.pDevIns == pDevIns)
1482 PDMR3BlkCacheRelease(pBlkCache);
1483 }
1484
1485 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1486}
1487
1488VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1489{
1490 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1491
1492 /*
1493 * Validate input.
1494 */
1495 if (!pDrvIns)
1496 return;
1497 VM_ASSERT_EMT(pVM);
1498
1499 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1500 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1501
1502 /* Return silently if not supported. */
1503 if (!pBlkCacheGlobal)
1504 return;
1505
1506 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1507
1508 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1509 {
1510 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1511 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1512 PDMR3BlkCacheRelease(pBlkCache);
1513 }
1514
1515 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1516}
1517
1518VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1519{
1520 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1521
1522 /*
1523 * Validate input.
1524 */
1525 if (!pUsbIns)
1526 return;
1527 VM_ASSERT_EMT(pVM);
1528
1529 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1530 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1531
1532 /* Return silently if not supported. */
1533 if (!pBlkCacheGlobal)
1534 return;
1535
1536 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1537
1538 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1539 {
1540 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1541 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1542 PDMR3BlkCacheRelease(pBlkCache);
1543 }
1544
1545 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1546}
1547
1548static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1549{
1550 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1551
1552 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1553 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1554 if (pEntry)
1555 pdmBlkCacheEntryRef(pEntry);
1556 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1557
1558 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1559
1560 return pEntry;
1561}
1562
1563/**
1564 * Return the best fit cache entries for the given offset.
1565 *
1566 * @returns nothing.
1567 * @param pBlkCache The endpoint cache.
1568 * @param off The offset.
1569 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1570 * the given offset. NULL if not required.
1571 */
1572static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1573{
1574 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1575
1576 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1577 if (ppEntryAbove)
1578 {
1579 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1580 if (*ppEntryAbove)
1581 pdmBlkCacheEntryRef(*ppEntryAbove);
1582 }
1583
1584 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1585
1586 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1587}
1588
1589static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1590{
1591 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1592 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1593 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1594 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1595 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1596 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1597}
1598
1599/**
1600 * Allocates and initializes a new entry for the cache.
1601 * The entry has a reference count of 1.
1602 *
1603 * @returns Pointer to the new cache entry or NULL if out of memory.
1604 * @param pBlkCache The cache the entry belongs to.
1605 * @param off Start offset.
1606 * @param cbData Size of the cache entry.
1607 * @param pbBuffer Pointer to the buffer to use.
1608 * NULL if a new buffer should be allocated.
1609 * The buffer needs to have the same size of the entry.
1610 */
1611static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1612{
1613 AssertReturn(cbData <= UINT32_MAX, NULL);
1614 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1615
1616 if (RT_UNLIKELY(!pEntryNew))
1617 return NULL;
1618
1619 pEntryNew->Core.Key = off;
1620 pEntryNew->Core.KeyLast = off + cbData - 1;
1621 pEntryNew->pBlkCache = pBlkCache;
1622 pEntryNew->fFlags = 0;
1623 pEntryNew->cRefs = 1; /* We are using it now. */
1624 pEntryNew->pList = NULL;
1625 pEntryNew->cbData = (uint32_t)cbData;
1626 pEntryNew->pWaitingHead = NULL;
1627 pEntryNew->pWaitingTail = NULL;
1628 if (pbBuffer)
1629 pEntryNew->pbData = pbBuffer;
1630 else
1631 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1632
1633 if (RT_UNLIKELY(!pEntryNew->pbData))
1634 {
1635 RTMemFree(pEntryNew);
1636 return NULL;
1637 }
1638
1639 return pEntryNew;
1640}
1641
1642/**
1643 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1644 * in exclusive mode.
1645 *
1646 * @returns true if the flag in fSet is set and the one in fClear is clear.
1647 * false otherwise.
1648 * The R/W semaphore is only held if true is returned.
1649 *
1650 * @param pBlkCache The endpoint cache instance data.
1651 * @param pEntry The entry to check the flags for.
1652 * @param fSet The flag which is tested to be set.
1653 * @param fClear The flag which is tested to be clear.
1654 */
1655DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1656 PPDMBLKCACHEENTRY pEntry,
1657 uint32_t fSet, uint32_t fClear)
1658{
1659 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1660 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1661
1662 if (fPassed)
1663 {
1664 /* Acquire the lock and check again because the completion callback might have raced us. */
1665 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1666
1667 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1668 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1669
1670 /* Drop the lock if we didn't passed the test. */
1671 if (!fPassed)
1672 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1673 }
1674
1675 return fPassed;
1676}
1677
1678/**
1679 * Adds a segment to the waiting list for a cache entry
1680 * which is currently in progress.
1681 *
1682 * @returns nothing.
1683 * @param pEntry The cache entry to add the segment to.
1684 * @param pWaiter The waiter entry to add.
1685 */
1686DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1687 PPDMBLKCACHEWAITER pWaiter)
1688{
1689 pWaiter->pNext = NULL;
1690
1691 if (pEntry->pWaitingHead)
1692 {
1693 AssertPtr(pEntry->pWaitingTail);
1694
1695 pEntry->pWaitingTail->pNext = pWaiter;
1696 pEntry->pWaitingTail = pWaiter;
1697 }
1698 else
1699 {
1700 Assert(!pEntry->pWaitingTail);
1701
1702 pEntry->pWaitingHead = pWaiter;
1703 pEntry->pWaitingTail = pWaiter;
1704 }
1705}
1706
1707/**
1708 * Add a buffer described by the I/O memory context
1709 * to the entry waiting for completion.
1710 *
1711 * @returns VBox status code.
1712 * @param pEntry The entry to add the buffer to.
1713 * @param pReq The request.
1714 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1715 * @param offDiff Offset from the start of the buffer in the entry.
1716 * @param cbData Amount of data to wait for onthis entry.
1717 * @param fWrite Flag whether the task waits because it wants to write to
1718 * the cache entry.
1719 */
1720static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1721 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1722{
1723 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1724 if (!pWaiter)
1725 return VERR_NO_MEMORY;
1726
1727 ASMAtomicIncU32(&pReq->cXfersPending);
1728 pWaiter->pReq = pReq;
1729 pWaiter->offCacheEntry = offDiff;
1730 pWaiter->cbTransfer = cbData;
1731 pWaiter->fWrite = fWrite;
1732 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1733 RTSgBufAdvance(pSgBuf, cbData);
1734
1735 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1736
1737 return VINF_SUCCESS;
1738}
1739
1740/**
1741 * Calculate aligned offset and size for a new cache entry which do not
1742 * intersect with an already existing entry and the file end.
1743 *
1744 * @returns The number of bytes the entry can hold of the requested amount
1745 * of bytes.
1746 * @param pBlkCache The endpoint cache.
1747 * @param off The start offset.
1748 * @param cb The number of bytes the entry needs to hold at
1749 * least.
1750 * @param pcbEntry Where to store the number of bytes the entry can hold.
1751 * Can be less than given because of other entries.
1752 */
1753static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1754 uint64_t off, uint32_t cb,
1755 uint32_t *pcbEntry)
1756{
1757 /* Get the best fit entries around the offset */
1758 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1759 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1760
1761 /* Log the info */
1762 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1763 pEntryAbove ? "B" : "No b",
1764 off,
1765 pEntryAbove ? pEntryAbove->Core.Key : 0,
1766 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1767 pEntryAbove ? pEntryAbove->cbData : 0));
1768
1769 uint32_t cbNext;
1770 uint32_t cbInEntry;
1771 if ( pEntryAbove
1772 && off + cb > pEntryAbove->Core.Key)
1773 {
1774 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1775 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1776 }
1777 else
1778 {
1779 cbInEntry = cb;
1780 cbNext = cb;
1781 }
1782
1783 /* A few sanity checks */
1784 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1785 ("Aligned size intersects with another cache entry\n"));
1786 Assert(cbInEntry <= cbNext);
1787
1788 if (pEntryAbove)
1789 pdmBlkCacheEntryRelease(pEntryAbove);
1790
1791 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1792
1793 *pcbEntry = cbNext;
1794
1795 return cbInEntry;
1796}
1797
1798/**
1799 * Create a new cache entry evicting data from the cache if required.
1800 *
1801 * @returns Pointer to the new cache entry or NULL
1802 * if not enough bytes could be evicted from the cache.
1803 * @param pBlkCache The endpoint cache.
1804 * @param off The offset.
1805 * @param cb Number of bytes the cache entry should have.
1806 * @param pcbData Where to store the number of bytes the new
1807 * entry can hold. May be lower than actually
1808 * requested due to another entry intersecting the
1809 * access range.
1810 */
1811static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1812{
1813 uint32_t cbEntry = 0;
1814
1815 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1816 AssertReturn(cb <= UINT32_MAX, NULL);
1817
1818 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1819 pdmBlkCacheLockEnter(pCache);
1820
1821 PPDMBLKCACHEENTRY pEntryNew = NULL;
1822 uint8_t *pbBuffer = NULL;
1823 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1824 if (fEnough)
1825 {
1826 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1827
1828 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1829 if (RT_LIKELY(pEntryNew))
1830 {
1831 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1832 pdmBlkCacheAdd(pCache, cbEntry);
1833 pdmBlkCacheLockLeave(pCache);
1834
1835 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1836
1837 AssertMsg( (off >= pEntryNew->Core.Key)
1838 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1839 ("Overflow in calculation off=%llu\n", off));
1840 }
1841 else
1842 pdmBlkCacheLockLeave(pCache);
1843 }
1844 else
1845 pdmBlkCacheLockLeave(pCache);
1846
1847 return pEntryNew;
1848}
1849
1850static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1851{
1852 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1853
1854 if (RT_LIKELY(pReq))
1855 {
1856 pReq->pvUser = pvUser;
1857 pReq->rcReq = VINF_SUCCESS;
1858 pReq->cXfersPending = 0;
1859 }
1860
1861 return pReq;
1862}
1863
1864static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1865{
1866 switch (pBlkCache->enmType)
1867 {
1868 case PDMBLKCACHETYPE_DEV:
1869 {
1870 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1871 pReq->pvUser, pReq->rcReq);
1872 break;
1873 }
1874 case PDMBLKCACHETYPE_DRV:
1875 {
1876 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1877 pReq->pvUser, pReq->rcReq);
1878 break;
1879 }
1880 case PDMBLKCACHETYPE_USB:
1881 {
1882 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1883 pReq->pvUser, pReq->rcReq);
1884 break;
1885 }
1886 case PDMBLKCACHETYPE_INTERNAL:
1887 {
1888 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1889 pReq->pvUser, pReq->rcReq);
1890 break;
1891 }
1892 default:
1893 AssertMsgFailed(("Unknown block cache type!\n"));
1894 }
1895
1896 RTMemFree(pReq);
1897}
1898
1899static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1900 int rcReq, bool fCallHandler)
1901{
1902 if (RT_FAILURE(rcReq))
1903 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1904
1905 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1906 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1907
1908 if (!cXfersPending)
1909 {
1910 if (fCallHandler)
1911 pdmBlkCacheReqComplete(pBlkCache, pReq);
1912 return true;
1913 }
1914
1915 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1916 return false;
1917}
1918
1919VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1920 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1921{
1922 int rc = VINF_SUCCESS;
1923 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1924 PPDMBLKCACHEENTRY pEntry;
1925 PPDMBLKCACHEREQ pReq;
1926
1927 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1928 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1929
1930 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1931 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1932
1933 RTSGBUF SgBuf;
1934 RTSgBufClone(&SgBuf, pSgBuf);
1935
1936 /* Allocate new request structure. */
1937 pReq = pdmBlkCacheReqAlloc(pvUser);
1938 if (RT_UNLIKELY(!pReq))
1939 return VERR_NO_MEMORY;
1940
1941 /* Increment data transfer counter to keep the request valid while we access it. */
1942 ASMAtomicIncU32(&pReq->cXfersPending);
1943
1944 while (cbRead)
1945 {
1946 size_t cbToRead;
1947
1948 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1949
1950 /*
1951 * If there is no entry we try to create a new one eviciting unused pages
1952 * if the cache is full. If this is not possible we will pass the request through
1953 * and skip the caching (all entries may be still in progress so they can't
1954 * be evicted)
1955 * If we have an entry it can be in one of the LRU lists where the entry
1956 * contains data (recently used or frequently used LRU) so we can just read
1957 * the data we need and put the entry at the head of the frequently used LRU list.
1958 * In case the entry is in one of the ghost lists it doesn't contain any data.
1959 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1960 */
1961 if (pEntry)
1962 {
1963 uint64_t offDiff = off - pEntry->Core.Key;
1964
1965 AssertMsg(off >= pEntry->Core.Key,
1966 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1967 off, pEntry->Core.Key));
1968
1969 AssertPtr(pEntry->pList);
1970
1971 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1972
1973 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1974 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1975 off, cbToRead));
1976
1977 cbRead -= cbToRead;
1978
1979 if (!cbRead)
1980 STAM_COUNTER_INC(&pCache->cHits);
1981 else
1982 STAM_COUNTER_INC(&pCache->cPartialHits);
1983
1984 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1985
1986 /* Ghost lists contain no data. */
1987 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1988 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1989 {
1990 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1991 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1992 PDMBLKCACHE_ENTRY_IS_DIRTY))
1993 {
1994 /* Entry didn't completed yet. Append to the list */
1995 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1996 &SgBuf, offDiff, cbToRead,
1997 false /* fWrite */);
1998 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1999 }
2000 else
2001 {
2002 /* Read as much as we can from the entry. */
2003 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2004 }
2005
2006 /* Move this entry to the top position */
2007 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2008 {
2009 pdmBlkCacheLockEnter(pCache);
2010 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2011 pdmBlkCacheLockLeave(pCache);
2012 }
2013 /* Release the entry */
2014 pdmBlkCacheEntryRelease(pEntry);
2015 }
2016 else
2017 {
2018 uint8_t *pbBuffer = NULL;
2019
2020 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2021
2022 pdmBlkCacheLockEnter(pCache);
2023 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2024 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2025
2026 /* Move the entry to Am and fetch it to the cache. */
2027 if (fEnough)
2028 {
2029 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2030 pdmBlkCacheAdd(pCache, pEntry->cbData);
2031 pdmBlkCacheLockLeave(pCache);
2032
2033 if (pbBuffer)
2034 pEntry->pbData = pbBuffer;
2035 else
2036 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2037 AssertPtr(pEntry->pbData);
2038
2039 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2040 &SgBuf, offDiff, cbToRead,
2041 false /* fWrite */);
2042 pdmBlkCacheEntryReadFromMedium(pEntry);
2043 /* Release the entry */
2044 pdmBlkCacheEntryRelease(pEntry);
2045 }
2046 else
2047 {
2048 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2049 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2050 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2051 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2052 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2053
2054 pdmBlkCacheLockLeave(pCache);
2055
2056 RTMemFree(pEntry);
2057
2058 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2059 &SgBuf, off, cbToRead,
2060 PDMBLKCACHEXFERDIR_READ);
2061 }
2062 }
2063 }
2064 else
2065 {
2066#ifdef VBOX_WITH_IO_READ_CACHE
2067 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2068 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2069 off, cbRead,
2070 &cbToRead);
2071
2072 cbRead -= cbToRead;
2073
2074 if (pEntryNew)
2075 {
2076 if (!cbRead)
2077 STAM_COUNTER_INC(&pCache->cMisses);
2078 else
2079 STAM_COUNTER_INC(&pCache->cPartialHits);
2080
2081 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2082 &SgBuf,
2083 off - pEntryNew->Core.Key,
2084 cbToRead,
2085 false /* fWrite */);
2086 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2087 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2088 }
2089 else
2090 {
2091 /*
2092 * There is not enough free space in the cache.
2093 * Pass the request directly to the I/O manager.
2094 */
2095 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2096
2097 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2098 &SgBuf, off, cbToRead,
2099 PDMBLKCACHEXFERDIR_READ);
2100 }
2101#else
2102 /* Clip read size if necessary. */
2103 PPDMBLKCACHEENTRY pEntryAbove;
2104 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2105
2106 if (pEntryAbove)
2107 {
2108 if (off + cbRead > pEntryAbove->Core.Key)
2109 cbToRead = pEntryAbove->Core.Key - off;
2110 else
2111 cbToRead = cbRead;
2112
2113 pdmBlkCacheEntryRelease(pEntryAbove);
2114 }
2115 else
2116 cbToRead = cbRead;
2117
2118 cbRead -= cbToRead;
2119 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2120 &SgBuf, off, cbToRead,
2121 PDMBLKCACHEXFERDIR_READ);
2122#endif
2123 }
2124 off += cbToRead;
2125 }
2126
2127 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2128 rc = VINF_AIO_TASK_PENDING;
2129 else
2130 {
2131 rc = pReq->rcReq;
2132 RTMemFree(pReq);
2133 }
2134
2135 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2136
2137 return rc;
2138}
2139
2140VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2141{
2142 int rc = VINF_SUCCESS;
2143 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2144 PPDMBLKCACHEENTRY pEntry;
2145 PPDMBLKCACHEREQ pReq;
2146
2147 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2148 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2149
2150 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2151 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2152
2153 RTSGBUF SgBuf;
2154 RTSgBufClone(&SgBuf, pSgBuf);
2155
2156 /* Allocate new request structure. */
2157 pReq = pdmBlkCacheReqAlloc(pvUser);
2158 if (RT_UNLIKELY(!pReq))
2159 return VERR_NO_MEMORY;
2160
2161 /* Increment data transfer counter to keep the request valid while we access it. */
2162 ASMAtomicIncU32(&pReq->cXfersPending);
2163
2164 while (cbWrite)
2165 {
2166 size_t cbToWrite;
2167
2168 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2169 if (pEntry)
2170 {
2171 /* Write the data into the entry and mark it as dirty */
2172 AssertPtr(pEntry->pList);
2173
2174 uint64_t offDiff = off - pEntry->Core.Key;
2175 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2176
2177 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2178 cbWrite -= cbToWrite;
2179
2180 if (!cbWrite)
2181 STAM_COUNTER_INC(&pCache->cHits);
2182 else
2183 STAM_COUNTER_INC(&pCache->cPartialHits);
2184
2185 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2186
2187 /* Ghost lists contain no data. */
2188 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2189 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2190 {
2191 /* Check if the entry is dirty. */
2192 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2193 PDMBLKCACHE_ENTRY_IS_DIRTY,
2194 0))
2195 {
2196 /* If it is already dirty but not in progress just update the data. */
2197 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2198 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2199 else
2200 {
2201 /* The data isn't written to the file yet */
2202 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2203 &SgBuf, offDiff, cbToWrite,
2204 true /* fWrite */);
2205 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2206 }
2207
2208 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2209 }
2210 else /* Dirty bit not set */
2211 {
2212 /*
2213 * Check if a read is in progress for this entry.
2214 * We have to defer processing in that case.
2215 */
2216 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2217 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2218 0))
2219 {
2220 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2221 &SgBuf, offDiff, cbToWrite,
2222 true /* fWrite */);
2223 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2224 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2225 }
2226 else /* I/O in progress flag not set */
2227 {
2228 /* Write as much as we can into the entry and update the file. */
2229 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2230
2231 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2232 if (fCommit)
2233 pdmBlkCacheCommitDirtyEntries(pCache);
2234 }
2235 } /* Dirty bit not set */
2236
2237 /* Move this entry to the top position */
2238 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2239 {
2240 pdmBlkCacheLockEnter(pCache);
2241 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2242 pdmBlkCacheLockLeave(pCache);
2243 }
2244
2245 pdmBlkCacheEntryRelease(pEntry);
2246 }
2247 else /* Entry is on the ghost list */
2248 {
2249 uint8_t *pbBuffer = NULL;
2250
2251 pdmBlkCacheLockEnter(pCache);
2252 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2253 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2254
2255 if (fEnough)
2256 {
2257 /* Move the entry to Am and fetch it to the cache. */
2258 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2259 pdmBlkCacheAdd(pCache, pEntry->cbData);
2260 pdmBlkCacheLockLeave(pCache);
2261
2262 if (pbBuffer)
2263 pEntry->pbData = pbBuffer;
2264 else
2265 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2266 AssertPtr(pEntry->pbData);
2267
2268 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2269 &SgBuf, offDiff, cbToWrite,
2270 true /* fWrite */);
2271 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2272 pdmBlkCacheEntryReadFromMedium(pEntry);
2273
2274 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2275 pdmBlkCacheEntryRelease(pEntry);
2276 }
2277 else
2278 {
2279 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2280 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2281 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2282 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2283 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2284
2285 pdmBlkCacheLockLeave(pCache);
2286
2287 RTMemFree(pEntry);
2288 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2289 &SgBuf, off, cbToWrite,
2290 PDMBLKCACHEXFERDIR_WRITE);
2291 }
2292 }
2293 }
2294 else /* No entry found */
2295 {
2296 /*
2297 * No entry found. Try to create a new cache entry to store the data in and if that fails
2298 * write directly to the file.
2299 */
2300 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2301 off, cbWrite,
2302 &cbToWrite);
2303
2304 cbWrite -= cbToWrite;
2305
2306 if (pEntryNew)
2307 {
2308 uint64_t offDiff = off - pEntryNew->Core.Key;
2309
2310 STAM_COUNTER_INC(&pCache->cHits);
2311
2312 /*
2313 * Check if it is possible to just write the data without waiting
2314 * for it to get fetched first.
2315 */
2316 if (!offDiff && pEntryNew->cbData == cbToWrite)
2317 {
2318 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2319
2320 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2321 if (fCommit)
2322 pdmBlkCacheCommitDirtyEntries(pCache);
2323 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2324 }
2325 else
2326 {
2327 /* Defer the write and fetch the data from the endpoint. */
2328 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2329 &SgBuf, offDiff, cbToWrite,
2330 true /* fWrite */);
2331 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2332 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2333 }
2334
2335 pdmBlkCacheEntryRelease(pEntryNew);
2336 }
2337 else
2338 {
2339 /*
2340 * There is not enough free space in the cache.
2341 * Pass the request directly to the I/O manager.
2342 */
2343 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2344
2345 STAM_COUNTER_INC(&pCache->cMisses);
2346
2347 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2348 &SgBuf, off, cbToWrite,
2349 PDMBLKCACHEXFERDIR_WRITE);
2350 }
2351 }
2352
2353 off += cbToWrite;
2354 }
2355
2356 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2357 rc = VINF_AIO_TASK_PENDING;
2358 else
2359 {
2360 rc = pReq->rcReq;
2361 RTMemFree(pReq);
2362 }
2363
2364 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2365
2366 return rc;
2367}
2368
2369VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2370{
2371 int rc = VINF_SUCCESS;
2372 PPDMBLKCACHEREQ pReq;
2373
2374 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2375
2376 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2377 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2378
2379 /* Commit dirty entries in the cache. */
2380 pdmBlkCacheCommit(pBlkCache);
2381
2382 /* Allocate new request structure. */
2383 pReq = pdmBlkCacheReqAlloc(pvUser);
2384 if (RT_UNLIKELY(!pReq))
2385 return VERR_NO_MEMORY;
2386
2387 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2388 PDMBLKCACHEXFERDIR_FLUSH);
2389 AssertRC(rc);
2390
2391 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2392 return VINF_AIO_TASK_PENDING;
2393}
2394
2395VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2396 unsigned cRanges, void *pvUser)
2397{
2398 int rc = VINF_SUCCESS;
2399 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2400 PPDMBLKCACHEENTRY pEntry;
2401 PPDMBLKCACHEREQ pReq;
2402
2403 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2404 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2405
2406 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2407 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2408
2409 /* Allocate new request structure. */
2410 pReq = pdmBlkCacheReqAlloc(pvUser);
2411 if (RT_UNLIKELY(!pReq))
2412 return VERR_NO_MEMORY;
2413
2414 /* Increment data transfer counter to keep the request valid while we access it. */
2415 ASMAtomicIncU32(&pReq->cXfersPending);
2416
2417 for (unsigned i = 0; i < cRanges; i++)
2418 {
2419 uint64_t offCur = paRanges[i].offStart;
2420 size_t cbLeft = paRanges[i].cbRange;
2421
2422 while (cbLeft)
2423 {
2424 size_t cbThisDiscard = 0;
2425
2426 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2427
2428 if (pEntry)
2429 {
2430 /* Write the data into the entry and mark it as dirty */
2431 AssertPtr(pEntry->pList);
2432
2433 uint64_t offDiff = offCur - pEntry->Core.Key;
2434
2435 AssertMsg(offCur >= pEntry->Core.Key,
2436 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2437 offCur, pEntry->Core.Key));
2438
2439 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2440
2441 /* Ghost lists contain no data. */
2442 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2443 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2444 {
2445 /* Check if the entry is dirty. */
2446 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2447 PDMBLKCACHE_ENTRY_IS_DIRTY,
2448 0))
2449 {
2450 /* If it is dirty but not yet in progress remove it. */
2451 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2452 {
2453 pdmBlkCacheLockEnter(pCache);
2454 pdmBlkCacheEntryRemoveFromList(pEntry);
2455
2456 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2457 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2458 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2459
2460 pdmBlkCacheLockLeave(pCache);
2461
2462 RTMemFree(pEntry);
2463 }
2464 else
2465 {
2466#if 0
2467 /* The data isn't written to the file yet */
2468 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2469 &SgBuf, offDiff, cbToWrite,
2470 true /* fWrite */);
2471 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2472#endif
2473 }
2474
2475 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2476 pdmBlkCacheEntryRelease(pEntry);
2477 }
2478 else /* Dirty bit not set */
2479 {
2480 /*
2481 * Check if a read is in progress for this entry.
2482 * We have to defer processing in that case.
2483 */
2484 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2485 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2486 0))
2487 {
2488#if 0
2489 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2490 &SgBuf, offDiff, cbToWrite,
2491 true /* fWrite */);
2492#endif
2493 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2494 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2495 pdmBlkCacheEntryRelease(pEntry);
2496 }
2497 else /* I/O in progress flag not set */
2498 {
2499 pdmBlkCacheLockEnter(pCache);
2500 pdmBlkCacheEntryRemoveFromList(pEntry);
2501
2502 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2503 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2504 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2505 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2506 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2507
2508 pdmBlkCacheLockLeave(pCache);
2509
2510 RTMemFree(pEntry);
2511 }
2512 } /* Dirty bit not set */
2513 }
2514 else /* Entry is on the ghost list just remove cache entry. */
2515 {
2516 pdmBlkCacheLockEnter(pCache);
2517 pdmBlkCacheEntryRemoveFromList(pEntry);
2518
2519 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2520 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2521 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2522 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2523 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2524
2525 pdmBlkCacheLockLeave(pCache);
2526
2527 RTMemFree(pEntry);
2528 }
2529 }
2530 /* else: no entry found. */
2531
2532 offCur += cbThisDiscard;
2533 cbLeft -= cbThisDiscard;
2534 }
2535 }
2536
2537 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2538 rc = VINF_AIO_TASK_PENDING;
2539 else
2540 {
2541 rc = pReq->rcReq;
2542 RTMemFree(pReq);
2543 }
2544
2545 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2546
2547 return rc;
2548}
2549
2550/**
2551 * Completes a task segment freeing all resources and completes the task handle
2552 * if everything was transferred.
2553 *
2554 * @returns Next task segment handle.
2555 * @param pBlkCache The endpoint block cache.
2556 * @param pWaiter Task segment to complete.
2557 * @param rc Status code to set.
2558 */
2559static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2560{
2561 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2562 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2563
2564 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2565
2566 RTMemFree(pWaiter);
2567
2568 return pNext;
2569}
2570
2571static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2572{
2573 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2574 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2575
2576 /* Reference the entry now as we are clearing the I/O in progress flag
2577 * which protected the entry till now. */
2578 pdmBlkCacheEntryRef(pEntry);
2579
2580 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2581 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2582
2583 /* Process waiting segment list. The data in entry might have changed in-between. */
2584 bool fDirty = false;
2585 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2586 PPDMBLKCACHEWAITER pCurr = pComplete;
2587
2588 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2589 ("The list tail was not updated correctly\n"));
2590 pEntry->pWaitingTail = NULL;
2591 pEntry->pWaitingHead = NULL;
2592
2593 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2594 {
2595 /*
2596 * An error here is difficult to handle as the original request completed already.
2597 * The error is logged for now and the VM is paused.
2598 * If the user continues the entry is written again in the hope
2599 * the user fixed the problem and the next write succeeds.
2600 */
2601 if (RT_FAILURE(rcIoXfer))
2602 {
2603 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2604 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2605
2606 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2607 {
2608 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2609 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2610 "Make sure there is enough free space on the disk and that the disk is working properly. "
2611 "Operation can be resumed afterwards"),
2612 pBlkCache->pszId, rcIoXfer);
2613 AssertRC(rc);
2614 }
2615
2616 /* Mark the entry as dirty again to get it added to the list later on. */
2617 fDirty = true;
2618 }
2619
2620 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2621
2622 while (pCurr)
2623 {
2624 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2625
2626 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2627 fDirty = true;
2628 pCurr = pCurr->pNext;
2629 }
2630 }
2631 else
2632 {
2633 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2634 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2635 ("Invalid flags set\n"));
2636
2637 while (pCurr)
2638 {
2639 if (pCurr->fWrite)
2640 {
2641 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2642 fDirty = true;
2643 }
2644 else
2645 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2646
2647 pCurr = pCurr->pNext;
2648 }
2649 }
2650
2651 bool fCommit = false;
2652 if (fDirty)
2653 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2654
2655 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2656
2657 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2658 pdmBlkCacheEntryRelease(pEntry);
2659
2660 if (fCommit)
2661 pdmBlkCacheCommitDirtyEntries(pCache);
2662
2663 /* Complete waiters now. */
2664 while (pComplete)
2665 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2666}
2667
2668VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2669{
2670 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2671
2672 if (hIoXfer->fIoCache)
2673 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2674 else
2675 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2676 RTMemFree(hIoXfer);
2677}
2678
2679/**
2680 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2681 *
2682 * @returns IPRT status code.
2683 * @param pNode The node to destroy.
2684 * @param pvUser Opaque user data.
2685 */
2686static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2687{
2688 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2689 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2690 NOREF(pvUser);
2691
2692 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2693 {
2694 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2695 pdmBlkCacheEntryRef(pEntry);
2696 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2697
2698 RTThreadSleep(1);
2699
2700 /* Re-enter all locks and drop the reference. */
2701 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2702 pdmBlkCacheEntryRelease(pEntry);
2703 }
2704
2705 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2706 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2707
2708 return VINF_SUCCESS;
2709}
2710
2711VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2712{
2713 int rc = VINF_SUCCESS;
2714 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2715
2716 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2717
2718 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2719 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2720 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2721
2722 /* Wait for all I/O to complete. */
2723 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2724 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2725 AssertRC(rc);
2726 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2727
2728 return rc;
2729}
2730
2731VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2732{
2733 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2734
2735 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2736
2737 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2738
2739 return VINF_SUCCESS;
2740}
2741
2742VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2743{
2744 int rc = VINF_SUCCESS;
2745 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2746
2747 /*
2748 * Commit all dirty entries now (they are waited on for completion during the
2749 * destruction of the AVL tree below).
2750 * The exception is if the VM was paused because of an I/O error before.
2751 */
2752 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2753 pdmBlkCacheCommit(pBlkCache);
2754
2755 /* Make sure nobody is accessing the cache while we delete the tree. */
2756 pdmBlkCacheLockEnter(pCache);
2757 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2758 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2759 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2760
2761 pdmBlkCacheLockLeave(pCache);
2762 return rc;
2763}
2764
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette