VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 86087

Last change on this file since 86087 was 82968, checked in by vboxsync, 5 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.2 KB
Line 
1/* $Id: PDMBlkCache.cpp 82968 2020-02-04 10:35:17Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22
23/*********************************************************************************************************************************
24* Header Files *
25*********************************************************************************************************************************/
26#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
27#include "PDMInternal.h"
28#include <iprt/asm.h>
29#include <iprt/mem.h>
30#include <iprt/path.h>
31#include <iprt/string.h>
32#include <iprt/trace.h>
33#include <VBox/log.h>
34#include <VBox/vmm/stam.h>
35#include <VBox/vmm/uvm.h>
36#include <VBox/vmm/vm.h>
37
38#include "PDMBlkCacheInternal.h"
39
40
41/*********************************************************************************************************************************
42* Defined Constants And Macros *
43*********************************************************************************************************************************/
44#ifdef VBOX_STRICT
45# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
46 do \
47 { \
48 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
49 ("Thread does not own critical section\n"));\
50 } while (0)
51
52# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
53 do \
54 { \
55 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
56 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
57 } while (0)
58
59# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
60 do \
61 { \
62 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
63 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
64 } while (0)
65
66#else
67# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
68# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
69# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
70#endif
71
72#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
73
74/* Enable to enable some tracing in the block cache code for investigating issues. */
75/*#define VBOX_BLKCACHE_TRACING 1*/
76
77
78/*********************************************************************************************************************************
79* Internal Functions *
80*********************************************************************************************************************************/
81
82static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
83 uint64_t off, size_t cbData, uint8_t *pbBuffer);
84static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
85
86
87/**
88 * Add message to the VM trace buffer.
89 *
90 * @returns nothing.
91 * @param pBlkCache The block cache.
92 * @param pszFmt The format string.
93 * @param ... Additional parameters for the string formatter.
94 */
95DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
96{
97#if defined(VBOX_BLKCACHE_TRACING)
98 va_list va;
99 va_start(va, pszFmt);
100 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
101 va_end(va);
102#else
103 RT_NOREF2(pBlkCache, pszFmt);
104#endif
105}
106
107/**
108 * Decrement the reference counter of the given cache entry.
109 *
110 * @returns nothing.
111 * @param pEntry The entry to release.
112 */
113DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
114{
115 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
116 ASMAtomicDecU32(&pEntry->cRefs);
117}
118
119/**
120 * Increment the reference counter of the given cache entry.
121 *
122 * @returns nothing.
123 * @param pEntry The entry to reference.
124 */
125DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
126{
127 ASMAtomicIncU32(&pEntry->cRefs);
128}
129
130#ifdef VBOX_STRICT
131static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
132{
133 /* Amount of cached data should never exceed the maximum amount. */
134 AssertMsg(pCache->cbCached <= pCache->cbMax,
135 ("Current amount of cached data exceeds maximum\n"));
136
137 /* The amount of cached data in the LRU and FRU list should match cbCached */
138 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
139 ("Amount of cached data doesn't match\n"));
140
141 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
142 ("Paged out list exceeds maximum\n"));
143}
144#endif
145
146DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
147{
148 RTCritSectEnter(&pCache->CritSect);
149#ifdef VBOX_STRICT
150 pdmBlkCacheValidate(pCache);
151#endif
152}
153
154DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
155{
156#ifdef VBOX_STRICT
157 pdmBlkCacheValidate(pCache);
158#endif
159 RTCritSectLeave(&pCache->CritSect);
160}
161
162DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
163{
164 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
165 pCache->cbCached -= cbAmount;
166}
167
168DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
169{
170 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
171 pCache->cbCached += cbAmount;
172}
173
174DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
175{
176 pList->cbCached += cbAmount;
177}
178
179DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
180{
181 pList->cbCached -= cbAmount;
182}
183
184#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
185/**
186 * Checks consistency of a LRU list.
187 *
188 * @returns nothing
189 * @param pList The LRU list to check.
190 * @param pNotInList Element which is not allowed to occur in the list.
191 */
192static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
193{
194 PPDMBLKCACHEENTRY pCurr = pList->pHead;
195
196 /* Check that there are no double entries and no cycles in the list. */
197 while (pCurr)
198 {
199 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
200
201 while (pNext)
202 {
203 AssertMsg(pCurr != pNext,
204 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
205 pCurr, pList));
206 pNext = pNext->pNext;
207 }
208
209 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
210
211 if (!pCurr->pNext)
212 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
213
214 pCurr = pCurr->pNext;
215 }
216}
217#endif
218
219/**
220 * Unlinks a cache entry from the LRU list it is assigned to.
221 *
222 * @returns nothing.
223 * @param pEntry The entry to unlink.
224 */
225static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
226{
227 PPDMBLKLRULIST pList = pEntry->pList;
228 PPDMBLKCACHEENTRY pPrev, pNext;
229
230 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
231
232 AssertPtr(pList);
233
234#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
235 pdmBlkCacheCheckList(pList, NULL);
236#endif
237
238 pPrev = pEntry->pPrev;
239 pNext = pEntry->pNext;
240
241 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
242 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
243
244 if (pPrev)
245 pPrev->pNext = pNext;
246 else
247 {
248 pList->pHead = pNext;
249
250 if (pNext)
251 pNext->pPrev = NULL;
252 }
253
254 if (pNext)
255 pNext->pPrev = pPrev;
256 else
257 {
258 pList->pTail = pPrev;
259
260 if (pPrev)
261 pPrev->pNext = NULL;
262 }
263
264 pEntry->pList = NULL;
265 pEntry->pPrev = NULL;
266 pEntry->pNext = NULL;
267 pdmBlkCacheListSub(pList, pEntry->cbData);
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, pEntry);
270#endif
271}
272
273/**
274 * Adds a cache entry to the given LRU list unlinking it from the currently
275 * assigned list if needed.
276 *
277 * @returns nothing.
278 * @param pList List to the add entry to.
279 * @param pEntry Entry to add.
280 */
281static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
282{
283 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
284#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
285 pdmBlkCacheCheckList(pList, NULL);
286#endif
287
288 /* Remove from old list if needed */
289 if (pEntry->pList)
290 pdmBlkCacheEntryRemoveFromList(pEntry);
291
292 pEntry->pNext = pList->pHead;
293 if (pList->pHead)
294 pList->pHead->pPrev = pEntry;
295 else
296 {
297 Assert(!pList->pTail);
298 pList->pTail = pEntry;
299 }
300
301 pEntry->pPrev = NULL;
302 pList->pHead = pEntry;
303 pdmBlkCacheListAdd(pList, pEntry->cbData);
304 pEntry->pList = pList;
305#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
306 pdmBlkCacheCheckList(pList, NULL);
307#endif
308}
309
310/**
311 * Destroys a LRU list freeing all entries.
312 *
313 * @returns nothing
314 * @param pList Pointer to the LRU list to destroy.
315 *
316 * @note The caller must own the critical section of the cache.
317 */
318static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
319{
320 while (pList->pHead)
321 {
322 PPDMBLKCACHEENTRY pEntry = pList->pHead;
323
324 pList->pHead = pEntry->pNext;
325
326 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
327 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
328
329 RTMemPageFree(pEntry->pbData, pEntry->cbData);
330 RTMemFree(pEntry);
331 }
332}
333
334/**
335 * Tries to remove the given amount of bytes from a given list in the cache
336 * moving the entries to one of the given ghosts lists
337 *
338 * @returns Amount of data which could be freed.
339 * @param pCache Pointer to the global cache data.
340 * @param cbData The amount of the data to free.
341 * @param pListSrc The source list to evict data from.
342 * @param pGhostListDst Where the ghost list removed entries should be
343 * moved to, NULL if the entry should be freed.
344 * @param fReuseBuffer Flag whether a buffer should be reused if it has
345 * the same size
346 * @param ppbBuffer Where to store the address of the buffer if an
347 * entry with the same size was found and
348 * fReuseBuffer is true.
349 *
350 * @note This function may return fewer bytes than requested because entries
351 * may be marked as non evictable if they are used for I/O at the
352 * moment.
353 */
354static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
355 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
356 bool fReuseBuffer, uint8_t **ppbBuffer)
357{
358 size_t cbEvicted = 0;
359
360 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
361
362 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
363 AssertMsg( !pGhostListDst
364 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
365 ("Destination list must be NULL or the recently used but paged out list\n"));
366
367 if (fReuseBuffer)
368 {
369 AssertPtr(ppbBuffer);
370 *ppbBuffer = NULL;
371 }
372
373 /* Start deleting from the tail. */
374 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
375
376 while ((cbEvicted < cbData) && pEntry)
377 {
378 PPDMBLKCACHEENTRY pCurr = pEntry;
379
380 pEntry = pEntry->pPrev;
381
382 /* We can't evict pages which are currently in progress or dirty but not in progress */
383 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
384 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
385 {
386 /* Ok eviction candidate. Grab the endpoint semaphore and check again
387 * because somebody else might have raced us. */
388 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
389 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
390
391 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
392 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
393 {
394 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
395
396 if (fReuseBuffer && pCurr->cbData == cbData)
397 {
398 STAM_COUNTER_INC(&pCache->StatBuffersReused);
399 *ppbBuffer = pCurr->pbData;
400 }
401 else if (pCurr->pbData)
402 RTMemPageFree(pCurr->pbData, pCurr->cbData);
403
404 pCurr->pbData = NULL;
405 cbEvicted += pCurr->cbData;
406
407 pdmBlkCacheEntryRemoveFromList(pCurr);
408 pdmBlkCacheSub(pCache, pCurr->cbData);
409
410 if (pGhostListDst)
411 {
412 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
413
414 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
415
416 /* We have to remove the last entries from the paged out list. */
417 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
418 && pGhostEntFree)
419 {
420 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
421 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
422
423 pGhostEntFree = pGhostEntFree->pPrev;
424
425 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
426
427 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
428 {
429 pdmBlkCacheEntryRemoveFromList(pFree);
430
431 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
432 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
433 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
434
435 RTMemFree(pFree);
436 }
437
438 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
439 }
440
441 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
442 {
443 /* Couldn't remove enough entries. Delete */
444 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
445 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
446 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
447
448 RTMemFree(pCurr);
449 }
450 else
451 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
452 }
453 else
454 {
455 /* Delete the entry from the AVL tree it is assigned to. */
456 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
457 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
458 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
459
460 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
461 RTMemFree(pCurr);
462 }
463 }
464
465 }
466 else
467 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
468 }
469
470 return cbEvicted;
471}
472
473static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
474{
475 size_t cbRemoved = 0;
476
477 if ((pCache->cbCached + cbData) < pCache->cbMax)
478 return true;
479 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
480 {
481 /* Try to evict as many bytes as possible from A1in */
482 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
483 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
484
485 /*
486 * If it was not possible to remove enough entries
487 * try the frequently accessed cache.
488 */
489 if (cbRemoved < cbData)
490 {
491 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
492
493 /*
494 * If we removed something we can't pass the reuse buffer flag anymore because
495 * we don't need to evict that much data
496 */
497 if (!cbRemoved)
498 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
499 NULL, fReuseBuffer, ppbBuffer);
500 else
501 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
502 NULL, false, NULL);
503 }
504 }
505 else
506 {
507 /* We have to remove entries from frequently access list. */
508 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
509 NULL, fReuseBuffer, ppbBuffer);
510 }
511
512 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
513 return (cbRemoved >= cbData);
514}
515
516DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
517{
518 int rc = VINF_SUCCESS;
519
520 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
521 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
522
523 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
524 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
525 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
526
527 switch (pBlkCache->enmType)
528 {
529 case PDMBLKCACHETYPE_DEV:
530 {
531 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
532 pIoXfer->enmXferDir,
533 off, cbXfer,
534 &pIoXfer->SgBuf, pIoXfer);
535 break;
536 }
537 case PDMBLKCACHETYPE_DRV:
538 {
539 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
540 pIoXfer->enmXferDir,
541 off, cbXfer,
542 &pIoXfer->SgBuf, pIoXfer);
543 break;
544 }
545 case PDMBLKCACHETYPE_USB:
546 {
547 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
548 pIoXfer->enmXferDir,
549 off, cbXfer,
550 &pIoXfer->SgBuf, pIoXfer);
551 break;
552 }
553 case PDMBLKCACHETYPE_INTERNAL:
554 {
555 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
556 pIoXfer->enmXferDir,
557 off, cbXfer,
558 &pIoXfer->SgBuf, pIoXfer);
559 break;
560 }
561 default:
562 AssertMsgFailed(("Unknown block cache type!\n"));
563 }
564
565 if (RT_FAILURE(rc))
566 {
567 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
568 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
569 }
570
571 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
572 return rc;
573}
574
575/**
576 * Initiates a read I/O task for the given entry.
577 *
578 * @returns VBox status code.
579 * @param pEntry The entry to fetch the data to.
580 */
581static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
582{
583 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
584 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
585
586 /* Make sure no one evicts the entry while it is accessed. */
587 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
588
589 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
590 if (RT_UNLIKELY(!pIoXfer))
591 return VERR_NO_MEMORY;
592
593 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
594
595 pIoXfer->fIoCache = true;
596 pIoXfer->pEntry = pEntry;
597 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
598 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
599 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
600 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
601
602 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
603}
604
605/**
606 * Initiates a write I/O task for the given entry.
607 *
608 * @returns nothing.
609 * @param pEntry The entry to read the data from.
610 */
611static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
612{
613 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
614 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
615
616 /* Make sure no one evicts the entry while it is accessed. */
617 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
618
619 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
620 if (RT_UNLIKELY(!pIoXfer))
621 return VERR_NO_MEMORY;
622
623 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
624
625 pIoXfer->fIoCache = true;
626 pIoXfer->pEntry = pEntry;
627 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
628 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
629 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
630 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
631
632 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
633}
634
635/**
636 * Passthrough a part of a request directly to the I/O manager handling the
637 * endpoint.
638 *
639 * @returns VBox status code.
640 * @param pBlkCache The endpoint cache.
641 * @param pReq The request.
642 * @param pSgBuf The scatter/gather buffer.
643 * @param offStart Offset to start transfer from.
644 * @param cbData Amount of data to transfer.
645 * @param enmXferDir The transfer type (read/write)
646 */
647static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
648 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
649 PDMBLKCACHEXFERDIR enmXferDir)
650{
651
652 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
653 if (RT_UNLIKELY(!pIoXfer))
654 return VERR_NO_MEMORY;
655
656 ASMAtomicIncU32(&pReq->cXfersPending);
657 pIoXfer->fIoCache = false;
658 pIoXfer->pReq = pReq;
659 pIoXfer->enmXferDir = enmXferDir;
660 if (pSgBuf)
661 {
662 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
663 RTSgBufAdvance(pSgBuf, cbData);
664 }
665
666 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
667}
668
669/**
670 * Commit a single dirty entry to the endpoint
671 *
672 * @returns nothing
673 * @param pEntry The entry to commit.
674 */
675static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
676{
677 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
678 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
679 ("Invalid flags set for entry %#p\n", pEntry));
680
681 pdmBlkCacheEntryWriteToMedium(pEntry);
682}
683
684/**
685 * Commit all dirty entries for a single endpoint.
686 *
687 * @returns nothing.
688 * @param pBlkCache The endpoint cache to commit.
689 */
690static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
691{
692 uint32_t cbCommitted = 0;
693
694 /* Return if the cache was suspended. */
695 if (pBlkCache->fSuspended)
696 return;
697
698 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
699
700 /* The list is moved to a new header to reduce locking overhead. */
701 RTLISTANCHOR ListDirtyNotCommitted;
702
703 RTSpinlockAcquire(pBlkCache->LockList);
704 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
705 RTSpinlockRelease(pBlkCache->LockList);
706
707 if (!RTListIsEmpty(&ListDirtyNotCommitted))
708 {
709 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
710
711 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
712 {
713 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
714 NodeNotCommitted);
715 pdmBlkCacheEntryCommit(pEntry);
716 cbCommitted += pEntry->cbData;
717 RTListNodeRemove(&pEntry->NodeNotCommitted);
718 pEntry = pNext;
719 }
720
721 /* Commit the last endpoint */
722 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
723 pdmBlkCacheEntryCommit(pEntry);
724 cbCommitted += pEntry->cbData;
725 RTListNodeRemove(&pEntry->NodeNotCommitted);
726 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
727 ("Committed all entries but list is not empty\n"));
728 }
729
730 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
731 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
732 ("Number of committed bytes exceeds number of dirty bytes\n"));
733 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
734
735 /* Reset the commit timer if we don't have any dirty bits. */
736 if ( !(cbDirtyOld - cbCommitted)
737 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
738 TMTimerStop(pBlkCache->pCache->pTimerCommit);
739}
740
741/**
742 * Commit all dirty entries in the cache.
743 *
744 * @returns nothing.
745 * @param pCache The global cache instance.
746 */
747static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
748{
749 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
750
751 if (!fCommitInProgress)
752 {
753 pdmBlkCacheLockEnter(pCache);
754 Assert(!RTListIsEmpty(&pCache->ListUsers));
755
756 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
757 AssertPtr(pBlkCache);
758
759 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
760 {
761 pdmBlkCacheCommit(pBlkCache);
762
763 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
764 NodeCacheUser);
765 }
766
767 /* Commit the last endpoint */
768 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
769 pdmBlkCacheCommit(pBlkCache);
770
771 pdmBlkCacheLockLeave(pCache);
772 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
773 }
774}
775
776/**
777 * Adds the given entry as a dirty to the cache.
778 *
779 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
780 * @param pBlkCache The endpoint cache the entry belongs to.
781 * @param pEntry The entry to add.
782 */
783static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
784{
785 bool fDirtyBytesExceeded = false;
786 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
787
788 /* If the commit timer is disabled we commit right away. */
789 if (pCache->u32CommitTimeoutMs == 0)
790 {
791 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
792 pdmBlkCacheEntryCommit(pEntry);
793 }
794 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
795 {
796 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
797
798 RTSpinlockAcquire(pBlkCache->LockList);
799 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
800 RTSpinlockRelease(pBlkCache->LockList);
801
802 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
803
804 /* Prevent committing if the VM was suspended. */
805 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
806 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
807 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
808 {
809 /* Arm the commit timer. */
810 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
811 }
812 }
813
814 return fDirtyBytesExceeded;
815}
816
817static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
818{
819 bool fFound = false;
820
821 PPDMBLKCACHE pBlkCache;
822 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
823 {
824 if (!RTStrCmp(pBlkCache->pszId, pcszId))
825 {
826 fFound = true;
827 break;
828 }
829 }
830
831 return fFound ? pBlkCache : NULL;
832}
833
834/**
835 * Commit timer callback.
836 */
837static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
838{
839 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
840 NOREF(pVM); NOREF(pTimer);
841
842 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
843
844 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
845 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
846 pdmBlkCacheCommitDirtyEntries(pCache);
847
848 LogFlowFunc(("Entries committed, going to sleep\n"));
849}
850
851static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
852{
853 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
854
855 AssertPtr(pBlkCacheGlobal);
856
857 pdmBlkCacheLockEnter(pBlkCacheGlobal);
858
859 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
860
861 /* Go through the list and save all dirty entries. */
862 PPDMBLKCACHE pBlkCache;
863 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
864 {
865 uint32_t cEntries = 0;
866 PPDMBLKCACHEENTRY pEntry;
867
868 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
869 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
870 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
871
872 /* Count the number of entries to safe. */
873 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
874 {
875 cEntries++;
876 }
877
878 SSMR3PutU32(pSSM, cEntries);
879
880 /* Walk the list of all dirty entries and save them. */
881 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
882 {
883 /* A few sanity checks. */
884 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
885 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
886 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
887 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
888 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
889 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
890 ("Invalid list\n"));
891 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
892 ("Size and range do not match\n"));
893
894 /* Save */
895 SSMR3PutU64(pSSM, pEntry->Core.Key);
896 SSMR3PutU32(pSSM, pEntry->cbData);
897 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
898 }
899
900 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
901 }
902
903 pdmBlkCacheLockLeave(pBlkCacheGlobal);
904
905 /* Terminator */
906 return SSMR3PutU32(pSSM, UINT32_MAX);
907}
908
909static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
910{
911 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
912 uint32_t cRefs;
913
914 NOREF(uPass);
915 AssertPtr(pBlkCacheGlobal);
916
917 pdmBlkCacheLockEnter(pBlkCacheGlobal);
918
919 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
920 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
921
922 SSMR3GetU32(pSSM, &cRefs);
923
924 /*
925 * Fewer users in the saved state than in the current VM are allowed
926 * because that means that there are only new ones which don't have any saved state
927 * which can get lost.
928 * More saved state entries than registered cache users are only allowed if the
929 * missing users don't have any data saved in the cache.
930 */
931 int rc = VINF_SUCCESS;
932 char *pszId = NULL;
933
934 while ( cRefs > 0
935 && RT_SUCCESS(rc))
936 {
937 PPDMBLKCACHE pBlkCache = NULL;
938 uint32_t cbId = 0;
939
940 SSMR3GetU32(pSSM, &cbId);
941 Assert(cbId > 0);
942
943 cbId++; /* Include terminator */
944 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
945 if (!pszId)
946 {
947 rc = VERR_NO_MEMORY;
948 break;
949 }
950
951 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
952 AssertRC(rc);
953
954 /* Search for the block cache with the provided id. */
955 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
956
957 /* Get the entries */
958 uint32_t cEntries;
959 SSMR3GetU32(pSSM, &cEntries);
960
961 if (!pBlkCache && (cEntries > 0))
962 {
963 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
964 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
965 break;
966 }
967
968 RTMemFree(pszId);
969 pszId = NULL;
970
971 while (cEntries > 0)
972 {
973 PPDMBLKCACHEENTRY pEntry;
974 uint64_t off;
975 uint32_t cbEntry;
976
977 SSMR3GetU64(pSSM, &off);
978 SSMR3GetU32(pSSM, &cbEntry);
979
980 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
981 if (!pEntry)
982 {
983 rc = VERR_NO_MEMORY;
984 break;
985 }
986
987 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
988 if (RT_FAILURE(rc))
989 {
990 RTMemFree(pEntry->pbData);
991 RTMemFree(pEntry);
992 break;
993 }
994
995 /* Insert into the tree. */
996 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
997 Assert(fInserted); NOREF(fInserted);
998
999 /* Add to the dirty list. */
1000 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1001 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1002 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1003 pdmBlkCacheEntryRelease(pEntry);
1004 cEntries--;
1005 }
1006
1007 cRefs--;
1008 }
1009
1010 if (pszId)
1011 RTMemFree(pszId);
1012
1013 if (cRefs && RT_SUCCESS(rc))
1014 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1015 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1016
1017 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1018
1019 if (RT_SUCCESS(rc))
1020 {
1021 uint32_t u32 = 0;
1022 rc = SSMR3GetU32(pSSM, &u32);
1023 if (RT_SUCCESS(rc))
1024 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1025 }
1026
1027 return rc;
1028}
1029
1030int pdmR3BlkCacheInit(PVM pVM)
1031{
1032 int rc = VINF_SUCCESS;
1033 PUVM pUVM = pVM->pUVM;
1034 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1035
1036 LogFlowFunc((": pVM=%p\n", pVM));
1037
1038 VM_ASSERT_EMT(pVM);
1039
1040 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1041 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1042
1043 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1044 if (!pBlkCacheGlobal)
1045 return VERR_NO_MEMORY;
1046
1047 RTListInit(&pBlkCacheGlobal->ListUsers);
1048 pBlkCacheGlobal->pVM = pVM;
1049 pBlkCacheGlobal->cRefs = 0;
1050 pBlkCacheGlobal->cbCached = 0;
1051 pBlkCacheGlobal->fCommitInProgress = false;
1052
1053 /* Initialize members */
1054 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1055 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1056 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1057
1058 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1059 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1060 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1061
1062 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1063 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1064 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1065
1066 do
1067 {
1068 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1069 AssertLogRelRCBreak(rc);
1070 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1071
1072 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1073 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1074 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1075 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1076
1077 /** @todo r=aeichner: Experiment to find optimal default values */
1078 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1079 AssertLogRelRCBreak(rc);
1080 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1081 AssertLogRelRCBreak(rc);
1082 } while (0);
1083
1084 if (RT_SUCCESS(rc))
1085 {
1086 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1087 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1088 "/PDM/BlkCache/cbMax",
1089 STAMUNIT_BYTES,
1090 "Maximum cache size");
1091 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1092 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1093 "/PDM/BlkCache/cbCached",
1094 STAMUNIT_BYTES,
1095 "Currently used cache");
1096 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1097 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1098 "/PDM/BlkCache/cbCachedMruIn",
1099 STAMUNIT_BYTES,
1100 "Number of bytes cached in MRU list");
1101 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1102 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1103 "/PDM/BlkCache/cbCachedMruOut",
1104 STAMUNIT_BYTES,
1105 "Number of bytes cached in FRU list");
1106 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1107 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1108 "/PDM/BlkCache/cbCachedFru",
1109 STAMUNIT_BYTES,
1110 "Number of bytes cached in FRU ghost list");
1111
1112#ifdef VBOX_WITH_STATISTICS
1113 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1114 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1115 "/PDM/BlkCache/CacheHits",
1116 STAMUNIT_COUNT, "Number of hits in the cache");
1117 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1118 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1119 "/PDM/BlkCache/CachePartialHits",
1120 STAMUNIT_COUNT, "Number of partial hits in the cache");
1121 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1122 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1123 "/PDM/BlkCache/CacheMisses",
1124 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1125 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1126 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1127 "/PDM/BlkCache/CacheRead",
1128 STAMUNIT_BYTES, "Number of bytes read from the cache");
1129 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1130 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1131 "/PDM/BlkCache/CacheWritten",
1132 STAMUNIT_BYTES, "Number of bytes written to the cache");
1133 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1134 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1135 "/PDM/BlkCache/CacheTreeGet",
1136 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1137 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1138 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1139 "/PDM/BlkCache/CacheTreeInsert",
1140 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1141 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1142 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1143 "/PDM/BlkCache/CacheTreeRemove",
1144 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1145 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1146 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1147 "/PDM/BlkCache/CacheBuffersReused",
1148 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1149#endif
1150
1151 /* Initialize the critical section */
1152 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1153 }
1154
1155 if (RT_SUCCESS(rc))
1156 {
1157 /* Create the commit timer */
1158 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1159 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1160 pdmBlkCacheCommitTimerCallback,
1161 pBlkCacheGlobal,
1162 "BlkCache-Commit",
1163 &pBlkCacheGlobal->pTimerCommit);
1164
1165 if (RT_SUCCESS(rc))
1166 {
1167 /* Register saved state handler. */
1168 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1169 NULL, NULL, NULL,
1170 NULL, pdmR3BlkCacheSaveExec, NULL,
1171 NULL, pdmR3BlkCacheLoadExec, NULL);
1172 if (RT_SUCCESS(rc))
1173 {
1174 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1175 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1176 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1177 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1178 return VINF_SUCCESS;
1179 }
1180 }
1181
1182 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1183 }
1184
1185 if (pBlkCacheGlobal)
1186 RTMemFree(pBlkCacheGlobal);
1187
1188 LogFlowFunc((": returns rc=%Rrc\n", rc));
1189 return rc;
1190}
1191
1192void pdmR3BlkCacheTerm(PVM pVM)
1193{
1194 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1195
1196 if (pBlkCacheGlobal)
1197 {
1198 /* Make sure no one else uses the cache now */
1199 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1200
1201 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1202 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1203 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1204 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1205
1206 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1207
1208 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1209 RTMemFree(pBlkCacheGlobal);
1210 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1211 }
1212}
1213
1214int pdmR3BlkCacheResume(PVM pVM)
1215{
1216 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1217
1218 LogFlowFunc(("pVM=%#p\n", pVM));
1219
1220 if ( pBlkCacheGlobal
1221 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1222 {
1223 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1224 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1225 }
1226
1227 return VINF_SUCCESS;
1228}
1229
1230static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1231{
1232 int rc = VINF_SUCCESS;
1233 PPDMBLKCACHE pBlkCache = NULL;
1234 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1235
1236 if (!pBlkCacheGlobal)
1237 return VERR_NOT_SUPPORTED;
1238
1239 /*
1240 * Check that no other user cache has the same id first,
1241 * Unique id's are necessary in case the state is saved.
1242 */
1243 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1244
1245 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1246
1247 if (!pBlkCache)
1248 {
1249 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1250
1251 if (pBlkCache)
1252 pBlkCache->pszId = RTStrDup(pcszId);
1253
1254 if ( pBlkCache
1255 && pBlkCache->pszId)
1256 {
1257 pBlkCache->fSuspended = false;
1258 pBlkCache->cIoXfersActive = 0;
1259 pBlkCache->pCache = pBlkCacheGlobal;
1260 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1261
1262 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1263 if (RT_SUCCESS(rc))
1264 {
1265 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1266 if (RT_SUCCESS(rc))
1267 {
1268 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1269 if (pBlkCache->pTree)
1270 {
1271#ifdef VBOX_WITH_STATISTICS
1272 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1273 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1274 STAMUNIT_COUNT, "Number of deferred writes",
1275 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1276#endif
1277
1278 /* Add to the list of users. */
1279 pBlkCacheGlobal->cRefs++;
1280 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1281 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1282
1283 *ppBlkCache = pBlkCache;
1284 LogFlowFunc(("returns success\n"));
1285 return VINF_SUCCESS;
1286 }
1287
1288 rc = VERR_NO_MEMORY;
1289 RTSemRWDestroy(pBlkCache->SemRWEntries);
1290 }
1291
1292 RTSpinlockDestroy(pBlkCache->LockList);
1293 }
1294
1295 RTStrFree(pBlkCache->pszId);
1296 }
1297 else
1298 rc = VERR_NO_MEMORY;
1299
1300 if (pBlkCache)
1301 RTMemFree(pBlkCache);
1302 }
1303 else
1304 rc = VERR_ALREADY_EXISTS;
1305
1306 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1307
1308 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1309 return rc;
1310}
1311
1312VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1313 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1314 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1315 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1316 const char *pcszId)
1317{
1318 int rc = VINF_SUCCESS;
1319 PPDMBLKCACHE pBlkCache;
1320
1321 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1322 if (RT_SUCCESS(rc))
1323 {
1324 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1325 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1326 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1327 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1328 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1329 *ppBlkCache = pBlkCache;
1330 }
1331
1332 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1333 return rc;
1334}
1335
1336VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1337 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1338 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1339 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1340 const char *pcszId)
1341{
1342 int rc = VINF_SUCCESS;
1343 PPDMBLKCACHE pBlkCache;
1344
1345 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1346 if (RT_SUCCESS(rc))
1347 {
1348 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1349 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1350 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1351 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1352 pBlkCache->u.Dev.pDevIns = pDevIns;
1353 *ppBlkCache = pBlkCache;
1354 }
1355
1356 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1357 return rc;
1358
1359}
1360
1361VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1362 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1363 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1364 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1365 const char *pcszId)
1366{
1367 int rc = VINF_SUCCESS;
1368 PPDMBLKCACHE pBlkCache;
1369
1370 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1371 if (RT_SUCCESS(rc))
1372 {
1373 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1374 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1375 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1376 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1377 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1378 *ppBlkCache = pBlkCache;
1379 }
1380
1381 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1382 return rc;
1383
1384}
1385
1386VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1387 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1388 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1389 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1390 const char *pcszId)
1391{
1392 int rc = VINF_SUCCESS;
1393 PPDMBLKCACHE pBlkCache;
1394
1395 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1396 if (RT_SUCCESS(rc))
1397 {
1398 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1399 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1400 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1401 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1402 pBlkCache->u.Int.pvUser = pvUser;
1403 *ppBlkCache = pBlkCache;
1404 }
1405
1406 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1407 return rc;
1408
1409}
1410
1411/**
1412 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1413 *
1414 * @returns IPRT status code.
1415 * @param pNode The node to destroy.
1416 * @param pvUser Opaque user data.
1417 */
1418static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1419{
1420 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1421 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1422 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1423
1424 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1425 {
1426 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1427 pdmBlkCacheEntryRef(pEntry);
1428 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1429 pdmBlkCacheLockLeave(pCache);
1430
1431 RTThreadSleep(250);
1432
1433 /* Re-enter all locks */
1434 pdmBlkCacheLockEnter(pCache);
1435 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1436 pdmBlkCacheEntryRelease(pEntry);
1437 }
1438
1439 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1440 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1441
1442 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1443 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1444
1445 pdmBlkCacheEntryRemoveFromList(pEntry);
1446
1447 if (fUpdateCache)
1448 pdmBlkCacheSub(pCache, pEntry->cbData);
1449
1450 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1451 RTMemFree(pEntry);
1452
1453 return VINF_SUCCESS;
1454}
1455
1456/**
1457 * Destroys all cache resources used by the given endpoint.
1458 *
1459 * @returns nothing.
1460 * @param pBlkCache Block cache handle.
1461 */
1462VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1463{
1464 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1465
1466 /*
1467 * Commit all dirty entries now (they are waited on for completion during the
1468 * destruction of the AVL tree below).
1469 * The exception is if the VM was paused because of an I/O error before.
1470 */
1471 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1472 pdmBlkCacheCommit(pBlkCache);
1473
1474 /* Make sure nobody is accessing the cache while we delete the tree. */
1475 pdmBlkCacheLockEnter(pCache);
1476 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1477 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1478 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1479
1480 RTSpinlockDestroy(pBlkCache->LockList);
1481
1482 pCache->cRefs--;
1483 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1484
1485 pdmBlkCacheLockLeave(pCache);
1486
1487 RTMemFree(pBlkCache->pTree);
1488 pBlkCache->pTree = NULL;
1489 RTSemRWDestroy(pBlkCache->SemRWEntries);
1490
1491#ifdef VBOX_WITH_STATISTICS
1492 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1493#endif
1494
1495 RTStrFree(pBlkCache->pszId);
1496 RTMemFree(pBlkCache);
1497}
1498
1499VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1500{
1501 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1502
1503 /*
1504 * Validate input.
1505 */
1506 if (!pDevIns)
1507 return;
1508 VM_ASSERT_EMT(pVM);
1509
1510 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1511 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1512
1513 /* Return silently if not supported. */
1514 if (!pBlkCacheGlobal)
1515 return;
1516
1517 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1518
1519 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1520 {
1521 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1522 && pBlkCache->u.Dev.pDevIns == pDevIns)
1523 PDMR3BlkCacheRelease(pBlkCache);
1524 }
1525
1526 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1527}
1528
1529VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1530{
1531 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1532
1533 /*
1534 * Validate input.
1535 */
1536 if (!pDrvIns)
1537 return;
1538 VM_ASSERT_EMT(pVM);
1539
1540 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1541 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1542
1543 /* Return silently if not supported. */
1544 if (!pBlkCacheGlobal)
1545 return;
1546
1547 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1548
1549 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1550 {
1551 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1552 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1553 PDMR3BlkCacheRelease(pBlkCache);
1554 }
1555
1556 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1557}
1558
1559VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1560{
1561 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1562
1563 /*
1564 * Validate input.
1565 */
1566 if (!pUsbIns)
1567 return;
1568 VM_ASSERT_EMT(pVM);
1569
1570 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1571 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1572
1573 /* Return silently if not supported. */
1574 if (!pBlkCacheGlobal)
1575 return;
1576
1577 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1578
1579 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1580 {
1581 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1582 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1583 PDMR3BlkCacheRelease(pBlkCache);
1584 }
1585
1586 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1587}
1588
1589static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1590{
1591 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1592
1593 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1594 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1595 if (pEntry)
1596 pdmBlkCacheEntryRef(pEntry);
1597 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1598
1599 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1600
1601 return pEntry;
1602}
1603
1604/**
1605 * Return the best fit cache entries for the given offset.
1606 *
1607 * @returns nothing.
1608 * @param pBlkCache The endpoint cache.
1609 * @param off The offset.
1610 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1611 * the given offset. NULL if not required.
1612 */
1613static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1614{
1615 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1616
1617 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1618 if (ppEntryAbove)
1619 {
1620 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1621 if (*ppEntryAbove)
1622 pdmBlkCacheEntryRef(*ppEntryAbove);
1623 }
1624
1625 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1626
1627 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1628}
1629
1630static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1631{
1632 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1633 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1634 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1635 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1636 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1637 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1638}
1639
1640/**
1641 * Allocates and initializes a new entry for the cache.
1642 * The entry has a reference count of 1.
1643 *
1644 * @returns Pointer to the new cache entry or NULL if out of memory.
1645 * @param pBlkCache The cache the entry belongs to.
1646 * @param off Start offset.
1647 * @param cbData Size of the cache entry.
1648 * @param pbBuffer Pointer to the buffer to use.
1649 * NULL if a new buffer should be allocated.
1650 * The buffer needs to have the same size of the entry.
1651 */
1652static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1653{
1654 AssertReturn(cbData <= UINT32_MAX, NULL);
1655 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1656
1657 if (RT_UNLIKELY(!pEntryNew))
1658 return NULL;
1659
1660 pEntryNew->Core.Key = off;
1661 pEntryNew->Core.KeyLast = off + cbData - 1;
1662 pEntryNew->pBlkCache = pBlkCache;
1663 pEntryNew->fFlags = 0;
1664 pEntryNew->cRefs = 1; /* We are using it now. */
1665 pEntryNew->pList = NULL;
1666 pEntryNew->cbData = (uint32_t)cbData;
1667 pEntryNew->pWaitingHead = NULL;
1668 pEntryNew->pWaitingTail = NULL;
1669 if (pbBuffer)
1670 pEntryNew->pbData = pbBuffer;
1671 else
1672 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1673
1674 if (RT_UNLIKELY(!pEntryNew->pbData))
1675 {
1676 RTMemFree(pEntryNew);
1677 return NULL;
1678 }
1679
1680 return pEntryNew;
1681}
1682
1683/**
1684 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1685 * in exclusive mode.
1686 *
1687 * @returns true if the flag in fSet is set and the one in fClear is clear.
1688 * false otherwise.
1689 * The R/W semaphore is only held if true is returned.
1690 *
1691 * @param pBlkCache The endpoint cache instance data.
1692 * @param pEntry The entry to check the flags for.
1693 * @param fSet The flag which is tested to be set.
1694 * @param fClear The flag which is tested to be clear.
1695 */
1696DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1697 PPDMBLKCACHEENTRY pEntry,
1698 uint32_t fSet, uint32_t fClear)
1699{
1700 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1701 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1702
1703 if (fPassed)
1704 {
1705 /* Acquire the lock and check again because the completion callback might have raced us. */
1706 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1707
1708 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1709 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1710
1711 /* Drop the lock if we didn't passed the test. */
1712 if (!fPassed)
1713 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1714 }
1715
1716 return fPassed;
1717}
1718
1719/**
1720 * Adds a segment to the waiting list for a cache entry
1721 * which is currently in progress.
1722 *
1723 * @returns nothing.
1724 * @param pEntry The cache entry to add the segment to.
1725 * @param pWaiter The waiter entry to add.
1726 */
1727DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1728 PPDMBLKCACHEWAITER pWaiter)
1729{
1730 pWaiter->pNext = NULL;
1731
1732 if (pEntry->pWaitingHead)
1733 {
1734 AssertPtr(pEntry->pWaitingTail);
1735
1736 pEntry->pWaitingTail->pNext = pWaiter;
1737 pEntry->pWaitingTail = pWaiter;
1738 }
1739 else
1740 {
1741 Assert(!pEntry->pWaitingTail);
1742
1743 pEntry->pWaitingHead = pWaiter;
1744 pEntry->pWaitingTail = pWaiter;
1745 }
1746}
1747
1748/**
1749 * Add a buffer described by the I/O memory context
1750 * to the entry waiting for completion.
1751 *
1752 * @returns VBox status code.
1753 * @param pEntry The entry to add the buffer to.
1754 * @param pReq The request.
1755 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1756 * @param offDiff Offset from the start of the buffer in the entry.
1757 * @param cbData Amount of data to wait for onthis entry.
1758 * @param fWrite Flag whether the task waits because it wants to write to
1759 * the cache entry.
1760 */
1761static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1762 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1763{
1764 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1765 if (!pWaiter)
1766 return VERR_NO_MEMORY;
1767
1768 ASMAtomicIncU32(&pReq->cXfersPending);
1769 pWaiter->pReq = pReq;
1770 pWaiter->offCacheEntry = offDiff;
1771 pWaiter->cbTransfer = cbData;
1772 pWaiter->fWrite = fWrite;
1773 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1774 RTSgBufAdvance(pSgBuf, cbData);
1775
1776 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1777
1778 return VINF_SUCCESS;
1779}
1780
1781/**
1782 * Calculate aligned offset and size for a new cache entry which do not
1783 * intersect with an already existing entry and the file end.
1784 *
1785 * @returns The number of bytes the entry can hold of the requested amount
1786 * of bytes.
1787 * @param pBlkCache The endpoint cache.
1788 * @param off The start offset.
1789 * @param cb The number of bytes the entry needs to hold at
1790 * least.
1791 * @param pcbEntry Where to store the number of bytes the entry can hold.
1792 * Can be less than given because of other entries.
1793 */
1794static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1795 uint64_t off, uint32_t cb,
1796 uint32_t *pcbEntry)
1797{
1798 /* Get the best fit entries around the offset */
1799 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1800 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1801
1802 /* Log the info */
1803 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1804 pEntryAbove ? "B" : "No b",
1805 off,
1806 pEntryAbove ? pEntryAbove->Core.Key : 0,
1807 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1808 pEntryAbove ? pEntryAbove->cbData : 0));
1809
1810 uint32_t cbNext;
1811 uint32_t cbInEntry;
1812 if ( pEntryAbove
1813 && off + cb > pEntryAbove->Core.Key)
1814 {
1815 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1816 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1817 }
1818 else
1819 {
1820 cbInEntry = cb;
1821 cbNext = cb;
1822 }
1823
1824 /* A few sanity checks */
1825 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1826 ("Aligned size intersects with another cache entry\n"));
1827 Assert(cbInEntry <= cbNext);
1828
1829 if (pEntryAbove)
1830 pdmBlkCacheEntryRelease(pEntryAbove);
1831
1832 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1833
1834 *pcbEntry = cbNext;
1835
1836 return cbInEntry;
1837}
1838
1839/**
1840 * Create a new cache entry evicting data from the cache if required.
1841 *
1842 * @returns Pointer to the new cache entry or NULL
1843 * if not enough bytes could be evicted from the cache.
1844 * @param pBlkCache The endpoint cache.
1845 * @param off The offset.
1846 * @param cb Number of bytes the cache entry should have.
1847 * @param pcbData Where to store the number of bytes the new
1848 * entry can hold. May be lower than actually
1849 * requested due to another entry intersecting the
1850 * access range.
1851 */
1852static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1853{
1854 uint32_t cbEntry = 0;
1855
1856 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1857 AssertReturn(cb <= UINT32_MAX, NULL);
1858
1859 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1860 pdmBlkCacheLockEnter(pCache);
1861
1862 PPDMBLKCACHEENTRY pEntryNew = NULL;
1863 uint8_t *pbBuffer = NULL;
1864 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1865 if (fEnough)
1866 {
1867 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1868
1869 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1870 if (RT_LIKELY(pEntryNew))
1871 {
1872 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1873 pdmBlkCacheAdd(pCache, cbEntry);
1874 pdmBlkCacheLockLeave(pCache);
1875
1876 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1877
1878 AssertMsg( (off >= pEntryNew->Core.Key)
1879 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1880 ("Overflow in calculation off=%llu\n", off));
1881 }
1882 else
1883 pdmBlkCacheLockLeave(pCache);
1884 }
1885 else
1886 pdmBlkCacheLockLeave(pCache);
1887
1888 return pEntryNew;
1889}
1890
1891static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1892{
1893 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1894
1895 if (RT_LIKELY(pReq))
1896 {
1897 pReq->pvUser = pvUser;
1898 pReq->rcReq = VINF_SUCCESS;
1899 pReq->cXfersPending = 0;
1900 }
1901
1902 return pReq;
1903}
1904
1905static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1906{
1907 switch (pBlkCache->enmType)
1908 {
1909 case PDMBLKCACHETYPE_DEV:
1910 {
1911 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1912 pReq->pvUser, pReq->rcReq);
1913 break;
1914 }
1915 case PDMBLKCACHETYPE_DRV:
1916 {
1917 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1918 pReq->pvUser, pReq->rcReq);
1919 break;
1920 }
1921 case PDMBLKCACHETYPE_USB:
1922 {
1923 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1924 pReq->pvUser, pReq->rcReq);
1925 break;
1926 }
1927 case PDMBLKCACHETYPE_INTERNAL:
1928 {
1929 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1930 pReq->pvUser, pReq->rcReq);
1931 break;
1932 }
1933 default:
1934 AssertMsgFailed(("Unknown block cache type!\n"));
1935 }
1936
1937 RTMemFree(pReq);
1938}
1939
1940static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1941 int rcReq, bool fCallHandler)
1942{
1943 if (RT_FAILURE(rcReq))
1944 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1945
1946 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1947 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1948
1949 if (!cXfersPending)
1950 {
1951 if (fCallHandler)
1952 pdmBlkCacheReqComplete(pBlkCache, pReq);
1953 return true;
1954 }
1955
1956 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1957 return false;
1958}
1959
1960VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1961 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1962{
1963 int rc = VINF_SUCCESS;
1964 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1965 PPDMBLKCACHEENTRY pEntry;
1966 PPDMBLKCACHEREQ pReq;
1967
1968 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1969 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1970
1971 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1972 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1973
1974 RTSGBUF SgBuf;
1975 RTSgBufClone(&SgBuf, pSgBuf);
1976
1977 /* Allocate new request structure. */
1978 pReq = pdmBlkCacheReqAlloc(pvUser);
1979 if (RT_UNLIKELY(!pReq))
1980 return VERR_NO_MEMORY;
1981
1982 /* Increment data transfer counter to keep the request valid while we access it. */
1983 ASMAtomicIncU32(&pReq->cXfersPending);
1984
1985 while (cbRead)
1986 {
1987 size_t cbToRead;
1988
1989 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1990
1991 /*
1992 * If there is no entry we try to create a new one eviciting unused pages
1993 * if the cache is full. If this is not possible we will pass the request through
1994 * and skip the caching (all entries may be still in progress so they can't
1995 * be evicted)
1996 * If we have an entry it can be in one of the LRU lists where the entry
1997 * contains data (recently used or frequently used LRU) so we can just read
1998 * the data we need and put the entry at the head of the frequently used LRU list.
1999 * In case the entry is in one of the ghost lists it doesn't contain any data.
2000 * We have to fetch it again evicting pages from either T1 or T2 to make room.
2001 */
2002 if (pEntry)
2003 {
2004 uint64_t offDiff = off - pEntry->Core.Key;
2005
2006 AssertMsg(off >= pEntry->Core.Key,
2007 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2008 off, pEntry->Core.Key));
2009
2010 AssertPtr(pEntry->pList);
2011
2012 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2013
2014 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2015 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2016 off, cbToRead));
2017
2018 cbRead -= cbToRead;
2019
2020 if (!cbRead)
2021 STAM_COUNTER_INC(&pCache->cHits);
2022 else
2023 STAM_COUNTER_INC(&pCache->cPartialHits);
2024
2025 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2026
2027 /* Ghost lists contain no data. */
2028 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2029 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2030 {
2031 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2032 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2033 PDMBLKCACHE_ENTRY_IS_DIRTY))
2034 {
2035 /* Entry didn't completed yet. Append to the list */
2036 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2037 &SgBuf, offDiff, cbToRead,
2038 false /* fWrite */);
2039 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2040 }
2041 else
2042 {
2043 /* Read as much as we can from the entry. */
2044 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2045 }
2046
2047 /* Move this entry to the top position */
2048 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2049 {
2050 pdmBlkCacheLockEnter(pCache);
2051 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2052 pdmBlkCacheLockLeave(pCache);
2053 }
2054 /* Release the entry */
2055 pdmBlkCacheEntryRelease(pEntry);
2056 }
2057 else
2058 {
2059 uint8_t *pbBuffer = NULL;
2060
2061 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2062
2063 pdmBlkCacheLockEnter(pCache);
2064 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2065 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2066
2067 /* Move the entry to Am and fetch it to the cache. */
2068 if (fEnough)
2069 {
2070 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2071 pdmBlkCacheAdd(pCache, pEntry->cbData);
2072 pdmBlkCacheLockLeave(pCache);
2073
2074 if (pbBuffer)
2075 pEntry->pbData = pbBuffer;
2076 else
2077 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2078 AssertPtr(pEntry->pbData);
2079
2080 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2081 &SgBuf, offDiff, cbToRead,
2082 false /* fWrite */);
2083 pdmBlkCacheEntryReadFromMedium(pEntry);
2084 /* Release the entry */
2085 pdmBlkCacheEntryRelease(pEntry);
2086 }
2087 else
2088 {
2089 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2090 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2091 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2092 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2093 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2094
2095 pdmBlkCacheLockLeave(pCache);
2096
2097 RTMemFree(pEntry);
2098
2099 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2100 &SgBuf, off, cbToRead,
2101 PDMBLKCACHEXFERDIR_READ);
2102 }
2103 }
2104 }
2105 else
2106 {
2107#ifdef VBOX_WITH_IO_READ_CACHE
2108 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2109 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2110 off, cbRead,
2111 &cbToRead);
2112
2113 cbRead -= cbToRead;
2114
2115 if (pEntryNew)
2116 {
2117 if (!cbRead)
2118 STAM_COUNTER_INC(&pCache->cMisses);
2119 else
2120 STAM_COUNTER_INC(&pCache->cPartialHits);
2121
2122 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2123 &SgBuf,
2124 off - pEntryNew->Core.Key,
2125 cbToRead,
2126 false /* fWrite */);
2127 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2128 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2129 }
2130 else
2131 {
2132 /*
2133 * There is not enough free space in the cache.
2134 * Pass the request directly to the I/O manager.
2135 */
2136 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2137
2138 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2139 &SgBuf, off, cbToRead,
2140 PDMBLKCACHEXFERDIR_READ);
2141 }
2142#else
2143 /* Clip read size if necessary. */
2144 PPDMBLKCACHEENTRY pEntryAbove;
2145 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2146
2147 if (pEntryAbove)
2148 {
2149 if (off + cbRead > pEntryAbove->Core.Key)
2150 cbToRead = pEntryAbove->Core.Key - off;
2151 else
2152 cbToRead = cbRead;
2153
2154 pdmBlkCacheEntryRelease(pEntryAbove);
2155 }
2156 else
2157 cbToRead = cbRead;
2158
2159 cbRead -= cbToRead;
2160 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2161 &SgBuf, off, cbToRead,
2162 PDMBLKCACHEXFERDIR_READ);
2163#endif
2164 }
2165 off += cbToRead;
2166 }
2167
2168 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2169 rc = VINF_AIO_TASK_PENDING;
2170 else
2171 {
2172 rc = pReq->rcReq;
2173 RTMemFree(pReq);
2174 }
2175
2176 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2177
2178 return rc;
2179}
2180
2181VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2182{
2183 int rc = VINF_SUCCESS;
2184 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2185 PPDMBLKCACHEENTRY pEntry;
2186 PPDMBLKCACHEREQ pReq;
2187
2188 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2189 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2190
2191 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2192 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2193
2194 RTSGBUF SgBuf;
2195 RTSgBufClone(&SgBuf, pSgBuf);
2196
2197 /* Allocate new request structure. */
2198 pReq = pdmBlkCacheReqAlloc(pvUser);
2199 if (RT_UNLIKELY(!pReq))
2200 return VERR_NO_MEMORY;
2201
2202 /* Increment data transfer counter to keep the request valid while we access it. */
2203 ASMAtomicIncU32(&pReq->cXfersPending);
2204
2205 while (cbWrite)
2206 {
2207 size_t cbToWrite;
2208
2209 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2210 if (pEntry)
2211 {
2212 /* Write the data into the entry and mark it as dirty */
2213 AssertPtr(pEntry->pList);
2214
2215 uint64_t offDiff = off - pEntry->Core.Key;
2216 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2217
2218 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2219 cbWrite -= cbToWrite;
2220
2221 if (!cbWrite)
2222 STAM_COUNTER_INC(&pCache->cHits);
2223 else
2224 STAM_COUNTER_INC(&pCache->cPartialHits);
2225
2226 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2227
2228 /* Ghost lists contain no data. */
2229 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2230 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2231 {
2232 /* Check if the entry is dirty. */
2233 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2234 PDMBLKCACHE_ENTRY_IS_DIRTY,
2235 0))
2236 {
2237 /* If it is already dirty but not in progress just update the data. */
2238 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2239 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2240 else
2241 {
2242 /* The data isn't written to the file yet */
2243 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2244 &SgBuf, offDiff, cbToWrite,
2245 true /* fWrite */);
2246 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2247 }
2248
2249 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2250 }
2251 else /* Dirty bit not set */
2252 {
2253 /*
2254 * Check if a read is in progress for this entry.
2255 * We have to defer processing in that case.
2256 */
2257 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2258 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2259 0))
2260 {
2261 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2262 &SgBuf, offDiff, cbToWrite,
2263 true /* fWrite */);
2264 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2265 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2266 }
2267 else /* I/O in progress flag not set */
2268 {
2269 /* Write as much as we can into the entry and update the file. */
2270 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2271
2272 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2273 if (fCommit)
2274 pdmBlkCacheCommitDirtyEntries(pCache);
2275 }
2276 } /* Dirty bit not set */
2277
2278 /* Move this entry to the top position */
2279 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2280 {
2281 pdmBlkCacheLockEnter(pCache);
2282 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2283 pdmBlkCacheLockLeave(pCache);
2284 }
2285
2286 pdmBlkCacheEntryRelease(pEntry);
2287 }
2288 else /* Entry is on the ghost list */
2289 {
2290 uint8_t *pbBuffer = NULL;
2291
2292 pdmBlkCacheLockEnter(pCache);
2293 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2294 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2295
2296 if (fEnough)
2297 {
2298 /* Move the entry to Am and fetch it to the cache. */
2299 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2300 pdmBlkCacheAdd(pCache, pEntry->cbData);
2301 pdmBlkCacheLockLeave(pCache);
2302
2303 if (pbBuffer)
2304 pEntry->pbData = pbBuffer;
2305 else
2306 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2307 AssertPtr(pEntry->pbData);
2308
2309 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2310 &SgBuf, offDiff, cbToWrite,
2311 true /* fWrite */);
2312 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2313 pdmBlkCacheEntryReadFromMedium(pEntry);
2314
2315 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2316 pdmBlkCacheEntryRelease(pEntry);
2317 }
2318 else
2319 {
2320 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2321 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2322 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2323 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2324 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2325
2326 pdmBlkCacheLockLeave(pCache);
2327
2328 RTMemFree(pEntry);
2329 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2330 &SgBuf, off, cbToWrite,
2331 PDMBLKCACHEXFERDIR_WRITE);
2332 }
2333 }
2334 }
2335 else /* No entry found */
2336 {
2337 /*
2338 * No entry found. Try to create a new cache entry to store the data in and if that fails
2339 * write directly to the file.
2340 */
2341 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2342 off, cbWrite,
2343 &cbToWrite);
2344
2345 cbWrite -= cbToWrite;
2346
2347 if (pEntryNew)
2348 {
2349 uint64_t offDiff = off - pEntryNew->Core.Key;
2350
2351 STAM_COUNTER_INC(&pCache->cHits);
2352
2353 /*
2354 * Check if it is possible to just write the data without waiting
2355 * for it to get fetched first.
2356 */
2357 if (!offDiff && pEntryNew->cbData == cbToWrite)
2358 {
2359 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2360
2361 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2362 if (fCommit)
2363 pdmBlkCacheCommitDirtyEntries(pCache);
2364 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2365 }
2366 else
2367 {
2368 /* Defer the write and fetch the data from the endpoint. */
2369 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2370 &SgBuf, offDiff, cbToWrite,
2371 true /* fWrite */);
2372 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2373 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2374 }
2375
2376 pdmBlkCacheEntryRelease(pEntryNew);
2377 }
2378 else
2379 {
2380 /*
2381 * There is not enough free space in the cache.
2382 * Pass the request directly to the I/O manager.
2383 */
2384 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2385
2386 STAM_COUNTER_INC(&pCache->cMisses);
2387
2388 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2389 &SgBuf, off, cbToWrite,
2390 PDMBLKCACHEXFERDIR_WRITE);
2391 }
2392 }
2393
2394 off += cbToWrite;
2395 }
2396
2397 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2398 rc = VINF_AIO_TASK_PENDING;
2399 else
2400 {
2401 rc = pReq->rcReq;
2402 RTMemFree(pReq);
2403 }
2404
2405 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2406
2407 return rc;
2408}
2409
2410VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2411{
2412 int rc = VINF_SUCCESS;
2413 PPDMBLKCACHEREQ pReq;
2414
2415 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2416
2417 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2418 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2419
2420 /* Commit dirty entries in the cache. */
2421 pdmBlkCacheCommit(pBlkCache);
2422
2423 /* Allocate new request structure. */
2424 pReq = pdmBlkCacheReqAlloc(pvUser);
2425 if (RT_UNLIKELY(!pReq))
2426 return VERR_NO_MEMORY;
2427
2428 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2429 PDMBLKCACHEXFERDIR_FLUSH);
2430 AssertRC(rc);
2431
2432 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2433 return VINF_AIO_TASK_PENDING;
2434}
2435
2436VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2437 unsigned cRanges, void *pvUser)
2438{
2439 int rc = VINF_SUCCESS;
2440 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2441 PPDMBLKCACHEENTRY pEntry;
2442 PPDMBLKCACHEREQ pReq;
2443
2444 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2445 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2446
2447 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2448 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2449
2450 /* Allocate new request structure. */
2451 pReq = pdmBlkCacheReqAlloc(pvUser);
2452 if (RT_UNLIKELY(!pReq))
2453 return VERR_NO_MEMORY;
2454
2455 /* Increment data transfer counter to keep the request valid while we access it. */
2456 ASMAtomicIncU32(&pReq->cXfersPending);
2457
2458 for (unsigned i = 0; i < cRanges; i++)
2459 {
2460 uint64_t offCur = paRanges[i].offStart;
2461 size_t cbLeft = paRanges[i].cbRange;
2462
2463 while (cbLeft)
2464 {
2465 size_t cbThisDiscard = 0;
2466
2467 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2468
2469 if (pEntry)
2470 {
2471 /* Write the data into the entry and mark it as dirty */
2472 AssertPtr(pEntry->pList);
2473
2474 uint64_t offDiff = offCur - pEntry->Core.Key;
2475
2476 AssertMsg(offCur >= pEntry->Core.Key,
2477 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2478 offCur, pEntry->Core.Key));
2479
2480 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2481
2482 /* Ghost lists contain no data. */
2483 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2484 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2485 {
2486 /* Check if the entry is dirty. */
2487 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2488 PDMBLKCACHE_ENTRY_IS_DIRTY,
2489 0))
2490 {
2491 /* If it is dirty but not yet in progress remove it. */
2492 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2493 {
2494 pdmBlkCacheLockEnter(pCache);
2495 pdmBlkCacheEntryRemoveFromList(pEntry);
2496
2497 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2498 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2499 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2500
2501 pdmBlkCacheLockLeave(pCache);
2502
2503 RTMemFree(pEntry);
2504 }
2505 else
2506 {
2507#if 0
2508 /* The data isn't written to the file yet */
2509 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2510 &SgBuf, offDiff, cbToWrite,
2511 true /* fWrite */);
2512 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2513#endif
2514 }
2515
2516 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2517 pdmBlkCacheEntryRelease(pEntry);
2518 }
2519 else /* Dirty bit not set */
2520 {
2521 /*
2522 * Check if a read is in progress for this entry.
2523 * We have to defer processing in that case.
2524 */
2525 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2526 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2527 0))
2528 {
2529#if 0
2530 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2531 &SgBuf, offDiff, cbToWrite,
2532 true /* fWrite */);
2533#endif
2534 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2535 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2536 pdmBlkCacheEntryRelease(pEntry);
2537 }
2538 else /* I/O in progress flag not set */
2539 {
2540 pdmBlkCacheLockEnter(pCache);
2541 pdmBlkCacheEntryRemoveFromList(pEntry);
2542
2543 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2544 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2545 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2546 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2547 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2548
2549 pdmBlkCacheLockLeave(pCache);
2550
2551 RTMemFree(pEntry);
2552 }
2553 } /* Dirty bit not set */
2554 }
2555 else /* Entry is on the ghost list just remove cache entry. */
2556 {
2557 pdmBlkCacheLockEnter(pCache);
2558 pdmBlkCacheEntryRemoveFromList(pEntry);
2559
2560 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2561 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2562 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2563 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2564 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2565
2566 pdmBlkCacheLockLeave(pCache);
2567
2568 RTMemFree(pEntry);
2569 }
2570 }
2571 /* else: no entry found. */
2572
2573 offCur += cbThisDiscard;
2574 cbLeft -= cbThisDiscard;
2575 }
2576 }
2577
2578 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2579 rc = VINF_AIO_TASK_PENDING;
2580 else
2581 {
2582 rc = pReq->rcReq;
2583 RTMemFree(pReq);
2584 }
2585
2586 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2587
2588 return rc;
2589}
2590
2591/**
2592 * Completes a task segment freeing all resources and completes the task handle
2593 * if everything was transferred.
2594 *
2595 * @returns Next task segment handle.
2596 * @param pBlkCache The endpoint block cache.
2597 * @param pWaiter Task segment to complete.
2598 * @param rc Status code to set.
2599 */
2600static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2601{
2602 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2603 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2604
2605 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2606
2607 RTMemFree(pWaiter);
2608
2609 return pNext;
2610}
2611
2612static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2613{
2614 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2615 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2616
2617 /* Reference the entry now as we are clearing the I/O in progress flag
2618 * which protected the entry till now. */
2619 pdmBlkCacheEntryRef(pEntry);
2620
2621 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2622 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2623
2624 /* Process waiting segment list. The data in entry might have changed in-between. */
2625 bool fDirty = false;
2626 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2627 PPDMBLKCACHEWAITER pCurr = pComplete;
2628
2629 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2630 ("The list tail was not updated correctly\n"));
2631 pEntry->pWaitingTail = NULL;
2632 pEntry->pWaitingHead = NULL;
2633
2634 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2635 {
2636 /*
2637 * An error here is difficult to handle as the original request completed already.
2638 * The error is logged for now and the VM is paused.
2639 * If the user continues the entry is written again in the hope
2640 * the user fixed the problem and the next write succeeds.
2641 */
2642 if (RT_FAILURE(rcIoXfer))
2643 {
2644 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2645 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2646
2647 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2648 {
2649 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2650 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2651 "Make sure there is enough free space on the disk and that the disk is working properly. "
2652 "Operation can be resumed afterwards"),
2653 pBlkCache->pszId, rcIoXfer);
2654 AssertRC(rc);
2655 }
2656
2657 /* Mark the entry as dirty again to get it added to the list later on. */
2658 fDirty = true;
2659 }
2660
2661 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2662
2663 while (pCurr)
2664 {
2665 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2666
2667 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2668 fDirty = true;
2669 pCurr = pCurr->pNext;
2670 }
2671 }
2672 else
2673 {
2674 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2675 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2676 ("Invalid flags set\n"));
2677
2678 while (pCurr)
2679 {
2680 if (pCurr->fWrite)
2681 {
2682 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2683 fDirty = true;
2684 }
2685 else
2686 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2687
2688 pCurr = pCurr->pNext;
2689 }
2690 }
2691
2692 bool fCommit = false;
2693 if (fDirty)
2694 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2695
2696 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2697
2698 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2699 pdmBlkCacheEntryRelease(pEntry);
2700
2701 if (fCommit)
2702 pdmBlkCacheCommitDirtyEntries(pCache);
2703
2704 /* Complete waiters now. */
2705 while (pComplete)
2706 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2707}
2708
2709VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2710{
2711 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2712
2713 if (hIoXfer->fIoCache)
2714 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2715 else
2716 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2717
2718 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2719 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2720 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2721 RTMemFree(hIoXfer);
2722}
2723
2724/**
2725 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2726 *
2727 * @returns IPRT status code.
2728 * @param pNode The node to destroy.
2729 * @param pvUser Opaque user data.
2730 */
2731static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2732{
2733 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2734 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2735 NOREF(pvUser);
2736
2737 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2738 {
2739 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2740 pdmBlkCacheEntryRef(pEntry);
2741 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2742
2743 RTThreadSleep(1);
2744
2745 /* Re-enter all locks and drop the reference. */
2746 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2747 pdmBlkCacheEntryRelease(pEntry);
2748 }
2749
2750 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2751 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2752
2753 return VINF_SUCCESS;
2754}
2755
2756VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2757{
2758 int rc = VINF_SUCCESS;
2759 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2760
2761 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2762
2763 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2764 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2765 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2766
2767 /* Wait for all I/O to complete. */
2768 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2769 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2770 AssertRC(rc);
2771 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2772
2773 return rc;
2774}
2775
2776VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2777{
2778 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2779
2780 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2781
2782 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2783
2784 return VINF_SUCCESS;
2785}
2786
2787VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2788{
2789 int rc = VINF_SUCCESS;
2790 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2791
2792 /*
2793 * Commit all dirty entries now (they are waited on for completion during the
2794 * destruction of the AVL tree below).
2795 * The exception is if the VM was paused because of an I/O error before.
2796 */
2797 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2798 pdmBlkCacheCommit(pBlkCache);
2799
2800 /* Make sure nobody is accessing the cache while we delete the tree. */
2801 pdmBlkCacheLockEnter(pCache);
2802 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2803 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2804 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2805
2806 pdmBlkCacheLockLeave(pCache);
2807 return rc;
2808}
2809
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette