VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 107132

Last change on this file since 107132 was 106061, checked in by vboxsync, 4 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.3 KB
Line 
1/* $Id: PDMBlkCache.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
29 * This component implements an I/O cache based on the 2Q cache algorithm.
30 */
31
32
33/*********************************************************************************************************************************
34* Header Files *
35*********************************************************************************************************************************/
36#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
37#include "PDMInternal.h"
38#include <iprt/asm.h>
39#include <iprt/mem.h>
40#include <iprt/path.h>
41#include <iprt/string.h>
42#include <iprt/trace.h>
43#include <VBox/log.h>
44#include <VBox/vmm/stam.h>
45#include <VBox/vmm/uvm.h>
46#include <VBox/vmm/vm.h>
47
48#include "PDMBlkCacheInternal.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#ifdef VBOX_STRICT
55# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
56 do \
57 { \
58 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
59 ("Thread does not own critical section\n"));\
60 } while (0)
61
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
63 do \
64 { \
65 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
66 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
67 } while (0)
68
69# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
70 do \
71 { \
72 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
73 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
74 } while (0)
75
76#else
77# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
78# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
79# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
80#endif
81
82#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
83
84/* Enable to enable some tracing in the block cache code for investigating issues. */
85/*#define VBOX_BLKCACHE_TRACING 1*/
86
87
88/*********************************************************************************************************************************
89* Internal Functions *
90*********************************************************************************************************************************/
91
92static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
93 uint64_t off, size_t cbData, uint8_t *pbBuffer);
94static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
95
96
97/**
98 * Add message to the VM trace buffer.
99 *
100 * @param pBlkCache The block cache.
101 * @param pszFmt The format string.
102 * @param ... Additional parameters for the string formatter.
103 */
104DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
105{
106#if defined(VBOX_BLKCACHE_TRACING)
107 va_list va;
108 va_start(va, pszFmt);
109 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
110 va_end(va);
111#else
112 RT_NOREF2(pBlkCache, pszFmt);
113#endif
114}
115
116/**
117 * Decrement the reference counter of the given cache entry.
118 *
119 * @param pEntry The entry to release.
120 */
121DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
122{
123 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
124 ASMAtomicDecU32(&pEntry->cRefs);
125}
126
127/**
128 * Increment the reference counter of the given cache entry.
129 *
130 * @param pEntry The entry to reference.
131 */
132DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
133{
134 ASMAtomicIncU32(&pEntry->cRefs);
135}
136
137#ifdef VBOX_STRICT
138static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
139{
140 /* Amount of cached data should never exceed the maximum amount. */
141 AssertMsg(pCache->cbCached <= pCache->cbMax,
142 ("Current amount of cached data exceeds maximum\n"));
143
144 /* The amount of cached data in the LRU and FRU list should match cbCached */
145 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
146 ("Amount of cached data doesn't match\n"));
147
148 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
149 ("Paged out list exceeds maximum\n"));
150}
151#endif
152
153DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
154{
155 RTCritSectEnter(&pCache->CritSect);
156#ifdef VBOX_STRICT
157 pdmBlkCacheValidate(pCache);
158#endif
159}
160
161DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
162{
163#ifdef VBOX_STRICT
164 pdmBlkCacheValidate(pCache);
165#endif
166 RTCritSectLeave(&pCache->CritSect);
167}
168
169DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
170{
171 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
172 pCache->cbCached -= cbAmount;
173}
174
175DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
176{
177 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
178 pCache->cbCached += cbAmount;
179}
180
181DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
182{
183 pList->cbCached += cbAmount;
184}
185
186DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
187{
188 pList->cbCached -= cbAmount;
189}
190
191#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
192/**
193 * Checks consistency of a LRU list.
194 *
195 * @param pList The LRU list to check.
196 * @param pNotInList Element which is not allowed to occur in the list.
197 */
198static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
199{
200 PPDMBLKCACHEENTRY pCurr = pList->pHead;
201
202 /* Check that there are no double entries and no cycles in the list. */
203 while (pCurr)
204 {
205 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
206
207 while (pNext)
208 {
209 AssertMsg(pCurr != pNext,
210 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
211 pCurr, pList));
212 pNext = pNext->pNext;
213 }
214
215 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
216
217 if (!pCurr->pNext)
218 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
219
220 pCurr = pCurr->pNext;
221 }
222}
223#endif
224
225/**
226 * Unlinks a cache entry from the LRU list it is assigned to.
227 *
228 * @param pEntry The entry to unlink.
229 */
230static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
231{
232 PPDMBLKLRULIST pList = pEntry->pList;
233 PPDMBLKCACHEENTRY pPrev, pNext;
234
235 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
236
237 AssertPtr(pList);
238
239#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
240 pdmBlkCacheCheckList(pList, NULL);
241#endif
242
243 pPrev = pEntry->pPrev;
244 pNext = pEntry->pNext;
245
246 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
247 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
248
249 if (pPrev)
250 pPrev->pNext = pNext;
251 else
252 {
253 pList->pHead = pNext;
254
255 if (pNext)
256 pNext->pPrev = NULL;
257 }
258
259 if (pNext)
260 pNext->pPrev = pPrev;
261 else
262 {
263 pList->pTail = pPrev;
264
265 if (pPrev)
266 pPrev->pNext = NULL;
267 }
268
269 pEntry->pList = NULL;
270 pEntry->pPrev = NULL;
271 pEntry->pNext = NULL;
272 pdmBlkCacheListSub(pList, pEntry->cbData);
273#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
274 pdmBlkCacheCheckList(pList, pEntry);
275#endif
276}
277
278/**
279 * Adds a cache entry to the given LRU list unlinking it from the currently
280 * assigned list if needed.
281 *
282 * @param pList List to the add entry to.
283 * @param pEntry Entry to add.
284 */
285static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
286{
287 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
288#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
289 pdmBlkCacheCheckList(pList, NULL);
290#endif
291
292 /* Remove from old list if needed */
293 if (pEntry->pList)
294 pdmBlkCacheEntryRemoveFromList(pEntry);
295
296 pEntry->pNext = pList->pHead;
297 if (pList->pHead)
298 pList->pHead->pPrev = pEntry;
299 else
300 {
301 Assert(!pList->pTail);
302 pList->pTail = pEntry;
303 }
304
305 pEntry->pPrev = NULL;
306 pList->pHead = pEntry;
307 pdmBlkCacheListAdd(pList, pEntry->cbData);
308 pEntry->pList = pList;
309#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
310 pdmBlkCacheCheckList(pList, NULL);
311#endif
312}
313
314/**
315 * Destroys a LRU list freeing all entries.
316 *
317 * @param pList Pointer to the LRU list to destroy.
318 *
319 * @note The caller must own the critical section of the cache.
320 */
321static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
322{
323 while (pList->pHead)
324 {
325 PPDMBLKCACHEENTRY pEntry = pList->pHead;
326
327 pList->pHead = pEntry->pNext;
328
329 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
330 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
331
332 RTMemPageFree(pEntry->pbData, pEntry->cbData);
333 RTMemFree(pEntry);
334 }
335}
336
337/**
338 * Tries to remove the given amount of bytes from a given list in the cache
339 * moving the entries to one of the given ghosts lists
340 *
341 * @returns Amount of data which could be freed.
342 * @param pCache Pointer to the global cache data.
343 * @param cbData The amount of the data to free.
344 * @param pListSrc The source list to evict data from.
345 * @param pGhostListDst Where the ghost list removed entries should be
346 * moved to, NULL if the entry should be freed.
347 * @param fReuseBuffer Flag whether a buffer should be reused if it has
348 * the same size
349 * @param ppbBuffer Where to store the address of the buffer if an
350 * entry with the same size was found and
351 * fReuseBuffer is true.
352 *
353 * @note This function may return fewer bytes than requested because entries
354 * may be marked as non evictable if they are used for I/O at the
355 * moment.
356 */
357static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
358 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
359 bool fReuseBuffer, uint8_t **ppbBuffer)
360{
361 size_t cbEvicted = 0;
362
363 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
364
365 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
366 AssertMsg( !pGhostListDst
367 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
368 ("Destination list must be NULL or the recently used but paged out list\n"));
369
370 if (fReuseBuffer)
371 {
372 AssertPtr(ppbBuffer);
373 *ppbBuffer = NULL;
374 }
375
376 /* Start deleting from the tail. */
377 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
378
379 while ((cbEvicted < cbData) && pEntry)
380 {
381 PPDMBLKCACHEENTRY pCurr = pEntry;
382
383 pEntry = pEntry->pPrev;
384
385 /* We can't evict pages which are currently in progress or dirty but not in progress */
386 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
387 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
388 {
389 /* Ok eviction candidate. Grab the endpoint semaphore and check again
390 * because somebody else might have raced us. */
391 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
392 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
395 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
396 {
397 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
398
399 if (fReuseBuffer && pCurr->cbData == cbData)
400 {
401 STAM_COUNTER_INC(&pCache->StatBuffersReused);
402 *ppbBuffer = pCurr->pbData;
403 }
404 else if (pCurr->pbData)
405 RTMemPageFree(pCurr->pbData, pCurr->cbData);
406
407 pCurr->pbData = NULL;
408 cbEvicted += pCurr->cbData;
409
410 pdmBlkCacheEntryRemoveFromList(pCurr);
411 pdmBlkCacheSub(pCache, pCurr->cbData);
412
413 if (pGhostListDst)
414 {
415 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
416
417 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
418
419 /* We have to remove the last entries from the paged out list. */
420 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
421 && pGhostEntFree)
422 {
423 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
424 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
425
426 pGhostEntFree = pGhostEntFree->pPrev;
427
428 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
429
430 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
431 {
432 pdmBlkCacheEntryRemoveFromList(pFree);
433
434 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
435 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
436 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
437
438 RTMemFree(pFree);
439 }
440
441 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
442 }
443
444 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
445 {
446 /* Couldn't remove enough entries. Delete */
447 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
448 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
449 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
450
451 RTMemFree(pCurr);
452 }
453 else
454 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
455 }
456 else
457 {
458 /* Delete the entry from the AVL tree it is assigned to. */
459 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
460 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
461 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
462
463 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
464 RTMemFree(pCurr);
465 }
466 }
467 else
468 {
469 LogFlow(("Someone raced us, entry %#p (%u bytes) cannot be evicted any more (fFlags=%#x cRefs=%#x)\n",
470 pCurr, pCurr->cbData, pCurr->fFlags, pCurr->cRefs));
471 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
472 }
473
474 }
475 else
476 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
477 }
478
479 return cbEvicted;
480}
481
482static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
483{
484 size_t cbRemoved = 0;
485
486 if ((pCache->cbCached + cbData) < pCache->cbMax)
487 return true;
488 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
489 {
490 /* Try to evict as many bytes as possible from A1in */
491 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
492 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
493
494 /*
495 * If it was not possible to remove enough entries
496 * try the frequently accessed cache.
497 */
498 if (cbRemoved < cbData)
499 {
500 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
501
502 /*
503 * If we removed something we can't pass the reuse buffer flag anymore because
504 * we don't need to evict that much data
505 */
506 if (!cbRemoved)
507 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
508 NULL, fReuseBuffer, ppbBuffer);
509 else
510 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
511 NULL, false, NULL);
512 }
513 }
514 else
515 {
516 /* We have to remove entries from frequently access list. */
517 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
518 NULL, fReuseBuffer, ppbBuffer);
519 }
520
521 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
522 return (cbRemoved >= cbData);
523}
524
525DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
526{
527 int rc = VINF_SUCCESS;
528
529 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
530 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
531
532 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
533 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
534 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
535
536 switch (pBlkCache->enmType)
537 {
538 case PDMBLKCACHETYPE_DEV:
539 {
540 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
541 pIoXfer->enmXferDir,
542 off, cbXfer,
543 &pIoXfer->SgBuf, pIoXfer);
544 break;
545 }
546 case PDMBLKCACHETYPE_DRV:
547 {
548 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
549 pIoXfer->enmXferDir,
550 off, cbXfer,
551 &pIoXfer->SgBuf, pIoXfer);
552 break;
553 }
554 case PDMBLKCACHETYPE_USB:
555 {
556 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
557 pIoXfer->enmXferDir,
558 off, cbXfer,
559 &pIoXfer->SgBuf, pIoXfer);
560 break;
561 }
562 case PDMBLKCACHETYPE_INTERNAL:
563 {
564 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
565 pIoXfer->enmXferDir,
566 off, cbXfer,
567 &pIoXfer->SgBuf, pIoXfer);
568 break;
569 }
570 default:
571 AssertMsgFailed(("Unknown block cache type!\n"));
572 }
573
574 if (RT_FAILURE(rc))
575 {
576 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
577 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
578 }
579
580 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
581 return rc;
582}
583
584/**
585 * Initiates a read I/O task for the given entry.
586 *
587 * @returns VBox status code.
588 * @param pEntry The entry to fetch the data to.
589 */
590static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
591{
592 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
593 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
594
595 /* Make sure no one evicts the entry while it is accessed. */
596 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
597
598 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
599 if (RT_UNLIKELY(!pIoXfer))
600 return VERR_NO_MEMORY;
601
602 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
603
604 pIoXfer->fIoCache = true;
605 pIoXfer->pEntry = pEntry;
606 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
607 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
608 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
609 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
610
611 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
612}
613
614/**
615 * Initiates a write I/O task for the given entry.
616 *
617 * @returns VBox status code.
618 * @param pEntry The entry to read the data from.
619 */
620static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
621{
622 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
623 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
624
625 /* Make sure no one evicts the entry while it is accessed. */
626 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
627
628 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
629 if (RT_UNLIKELY(!pIoXfer))
630 return VERR_NO_MEMORY;
631
632 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
633
634 pIoXfer->fIoCache = true;
635 pIoXfer->pEntry = pEntry;
636 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
637 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
638 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
639 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
640
641 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
642}
643
644/**
645 * Passthrough a part of a request directly to the I/O manager handling the
646 * endpoint.
647 *
648 * @returns VBox status code.
649 * @param pBlkCache The endpoint cache.
650 * @param pReq The request.
651 * @param pSgBuf The scatter/gather buffer.
652 * @param offStart Offset to start transfer from.
653 * @param cbData Amount of data to transfer.
654 * @param enmXferDir The transfer type (read/write)
655 */
656static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
657 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
658 PDMBLKCACHEXFERDIR enmXferDir)
659{
660
661 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
662 if (RT_UNLIKELY(!pIoXfer))
663 return VERR_NO_MEMORY;
664
665 ASMAtomicIncU32(&pReq->cXfersPending);
666 pIoXfer->fIoCache = false;
667 pIoXfer->pReq = pReq;
668 pIoXfer->enmXferDir = enmXferDir;
669 if (pSgBuf)
670 {
671 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
672 RTSgBufAdvance(pSgBuf, cbData);
673 }
674
675 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
676}
677
678/**
679 * Commit a single dirty entry to the endpoint
680 *
681 * @param pEntry The entry to commit.
682 */
683static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
684{
685 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
686 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
687 ("Invalid flags set for entry %#p\n", pEntry));
688
689 pdmBlkCacheEntryWriteToMedium(pEntry);
690}
691
692/**
693 * Commit all dirty entries for a single endpoint.
694 *
695 * @param pBlkCache The endpoint cache to commit.
696 */
697static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
698{
699 uint32_t cbCommitted = 0;
700
701 /* Return if the cache was suspended. */
702 if (pBlkCache->fSuspended)
703 return;
704
705 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
706
707 /* The list is moved to a new header to reduce locking overhead. */
708 RTLISTANCHOR ListDirtyNotCommitted;
709
710 RTSpinlockAcquire(pBlkCache->LockList);
711 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
712 RTSpinlockRelease(pBlkCache->LockList);
713
714 if (!RTListIsEmpty(&ListDirtyNotCommitted))
715 {
716 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
717
718 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
719 {
720 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
721 NodeNotCommitted);
722 pdmBlkCacheEntryCommit(pEntry);
723 cbCommitted += pEntry->cbData;
724 RTListNodeRemove(&pEntry->NodeNotCommitted);
725 pEntry = pNext;
726 }
727
728 /* Commit the last endpoint */
729 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
730 pdmBlkCacheEntryCommit(pEntry);
731 cbCommitted += pEntry->cbData;
732 RTListNodeRemove(&pEntry->NodeNotCommitted);
733 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
734 ("Committed all entries but list is not empty\n"));
735 }
736
737 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
738 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
739 ("Number of committed bytes exceeds number of dirty bytes\n"));
740 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
741
742 /* Reset the commit timer if we don't have any dirty bits. */
743 if ( !(cbDirtyOld - cbCommitted)
744 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
745 TMTimerStop(pBlkCache->pCache->pVM, pBlkCache->pCache->hTimerCommit);
746}
747
748/**
749 * Commit all dirty entries in the cache.
750 *
751 * @param pCache The global cache instance.
752 */
753static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
754{
755 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
756
757 if (!fCommitInProgress)
758 {
759 pdmBlkCacheLockEnter(pCache);
760 Assert(!RTListIsEmpty(&pCache->ListUsers));
761
762 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
763 AssertPtr(pBlkCache);
764
765 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
766 {
767 pdmBlkCacheCommit(pBlkCache);
768
769 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
770 NodeCacheUser);
771 }
772
773 /* Commit the last endpoint */
774 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
775 pdmBlkCacheCommit(pBlkCache);
776
777 pdmBlkCacheLockLeave(pCache);
778 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
779 }
780}
781
782/**
783 * Adds the given entry as a dirty to the cache.
784 *
785 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
786 * @param pBlkCache The endpoint cache the entry belongs to.
787 * @param pEntry The entry to add.
788 */
789static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
790{
791 bool fDirtyBytesExceeded = false;
792 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
793
794 /* If the commit timer is disabled we commit right away. */
795 if (pCache->u32CommitTimeoutMs == 0)
796 {
797 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
798 pdmBlkCacheEntryCommit(pEntry);
799 }
800 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
801 {
802 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
803
804 RTSpinlockAcquire(pBlkCache->LockList);
805 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
806 RTSpinlockRelease(pBlkCache->LockList);
807
808 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
809
810 /* Prevent committing if the VM was suspended. */
811 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
812 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
813 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
814 {
815 /* Arm the commit timer. */
816 TMTimerSetMillies(pCache->pVM, pCache->hTimerCommit, pCache->u32CommitTimeoutMs);
817 }
818 }
819
820 return fDirtyBytesExceeded;
821}
822
823static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
824{
825 bool fFound = false;
826
827 PPDMBLKCACHE pBlkCache;
828 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
829 {
830 if (!RTStrCmp(pBlkCache->pszId, pcszId))
831 {
832 fFound = true;
833 break;
834 }
835 }
836
837 return fFound ? pBlkCache : NULL;
838}
839
840/**
841 * @callback_method_impl{FNTMTIMERINT, Commit timer callback.}
842 */
843static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, TMTIMERHANDLE hTimer, void *pvUser)
844{
845 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
846 RT_NOREF(pVM, hTimer);
847
848 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
849
850 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
851 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
852 pdmBlkCacheCommitDirtyEntries(pCache);
853
854 LogFlowFunc(("Entries committed, going to sleep\n"));
855}
856
857static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
858{
859 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
860
861 AssertPtr(pBlkCacheGlobal);
862
863 pdmBlkCacheLockEnter(pBlkCacheGlobal);
864
865 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
866
867 /* Go through the list and save all dirty entries. */
868 PPDMBLKCACHE pBlkCache;
869 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
870 {
871 uint32_t cEntries = 0;
872 PPDMBLKCACHEENTRY pEntry;
873
874 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
875 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
876 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
877
878 /* Count the number of entries to safe. */
879 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
880 {
881 cEntries++;
882 }
883
884 SSMR3PutU32(pSSM, cEntries);
885
886 /* Walk the list of all dirty entries and save them. */
887 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
888 {
889 /* A few sanity checks. */
890 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
891 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
892 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
893 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
894 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
895 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
896 ("Invalid list\n"));
897 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
898 ("Size and range do not match\n"));
899
900 /* Save */
901 SSMR3PutU64(pSSM, pEntry->Core.Key);
902 SSMR3PutU32(pSSM, pEntry->cbData);
903 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
904 }
905
906 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
907 }
908
909 pdmBlkCacheLockLeave(pBlkCacheGlobal);
910
911 /* Terminator */
912 return SSMR3PutU32(pSSM, UINT32_MAX);
913}
914
915static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
916{
917 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
918 uint32_t cRefs;
919
920 NOREF(uPass);
921 AssertPtr(pBlkCacheGlobal);
922
923 pdmBlkCacheLockEnter(pBlkCacheGlobal);
924
925 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
926 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
927
928 SSMR3GetU32(pSSM, &cRefs);
929
930 /*
931 * Fewer users in the saved state than in the current VM are allowed
932 * because that means that there are only new ones which don't have any saved state
933 * which can get lost.
934 * More saved state entries than registered cache users are only allowed if the
935 * missing users don't have any data saved in the cache.
936 */
937 int rc = VINF_SUCCESS;
938 char *pszId = NULL;
939
940 while ( cRefs > 0
941 && RT_SUCCESS(rc))
942 {
943 PPDMBLKCACHE pBlkCache = NULL;
944 uint32_t cbId = 0;
945
946 SSMR3GetU32(pSSM, &cbId);
947 Assert(cbId > 0);
948
949 cbId++; /* Include terminator */
950 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
951 if (!pszId)
952 {
953 rc = VERR_NO_MEMORY;
954 break;
955 }
956
957 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
958 AssertRC(rc);
959
960 /* Search for the block cache with the provided id. */
961 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
962
963 /* Get the entries */
964 uint32_t cEntries;
965 SSMR3GetU32(pSSM, &cEntries);
966
967 if (!pBlkCache && (cEntries > 0))
968 {
969 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
970 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
971 break;
972 }
973
974 RTMemFree(pszId);
975 pszId = NULL;
976
977 while (cEntries > 0)
978 {
979 PPDMBLKCACHEENTRY pEntry;
980 uint64_t off;
981 uint32_t cbEntry;
982
983 SSMR3GetU64(pSSM, &off);
984 SSMR3GetU32(pSSM, &cbEntry);
985
986 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
987 if (!pEntry)
988 {
989 rc = VERR_NO_MEMORY;
990 break;
991 }
992
993 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
994 if (RT_FAILURE(rc))
995 {
996 RTMemFree(pEntry->pbData);
997 RTMemFree(pEntry);
998 break;
999 }
1000
1001 /* Insert into the tree. */
1002 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1003 Assert(fInserted); NOREF(fInserted);
1004
1005 /* Add to the dirty list. */
1006 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1007 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1008 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1009 pdmBlkCacheEntryRelease(pEntry);
1010 cEntries--;
1011 }
1012
1013 cRefs--;
1014 }
1015
1016 if (pszId)
1017 RTMemFree(pszId);
1018
1019 if (cRefs && RT_SUCCESS(rc))
1020 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1021 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1022
1023 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1024
1025 if (RT_SUCCESS(rc))
1026 {
1027 uint32_t u32 = 0;
1028 rc = SSMR3GetU32(pSSM, &u32);
1029 if (RT_SUCCESS(rc))
1030 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1031 }
1032
1033 return rc;
1034}
1035
1036int pdmR3BlkCacheInit(PVM pVM)
1037{
1038 int rc = VINF_SUCCESS;
1039 PUVM pUVM = pVM->pUVM;
1040 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1041
1042 LogFlowFunc((": pVM=%p\n", pVM));
1043
1044 VM_ASSERT_EMT(pVM);
1045
1046 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1047 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1048
1049 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1050 if (!pBlkCacheGlobal)
1051 return VERR_NO_MEMORY;
1052
1053 RTListInit(&pBlkCacheGlobal->ListUsers);
1054 pBlkCacheGlobal->pVM = pVM;
1055 pBlkCacheGlobal->cRefs = 0;
1056 pBlkCacheGlobal->cbCached = 0;
1057 pBlkCacheGlobal->fCommitInProgress = false;
1058
1059 /* Initialize members */
1060 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1061 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1062 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1063
1064 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1065 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1066 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1067
1068 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1069 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1070 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1071
1072 do
1073 {
1074 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1075 AssertLogRelRCBreak(rc);
1076 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1077
1078 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1079 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1080 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1081 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1082
1083 /** @todo r=aeichner: Experiment to find optimal default values */
1084 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1085 AssertLogRelRCBreak(rc);
1086 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1087 AssertLogRelRCBreak(rc);
1088 } while (0);
1089
1090 if (RT_SUCCESS(rc))
1091 {
1092 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1093 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1094 "/PDM/BlkCache/cbMax",
1095 STAMUNIT_BYTES,
1096 "Maximum cache size");
1097 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1098 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1099 "/PDM/BlkCache/cbCached",
1100 STAMUNIT_BYTES,
1101 "Currently used cache");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbCachedMruIn",
1105 STAMUNIT_BYTES,
1106 "Number of bytes cached in MRU list");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1108 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/cbCachedMruOut",
1110 STAMUNIT_BYTES,
1111 "Number of bytes cached in FRU list");
1112 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1113 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1114 "/PDM/BlkCache/cbCachedFru",
1115 STAMUNIT_BYTES,
1116 "Number of bytes cached in FRU ghost list");
1117
1118#ifdef VBOX_WITH_STATISTICS
1119 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1120 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1121 "/PDM/BlkCache/CacheHits",
1122 STAMUNIT_COUNT, "Number of hits in the cache");
1123 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1124 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1125 "/PDM/BlkCache/CachePartialHits",
1126 STAMUNIT_COUNT, "Number of partial hits in the cache");
1127 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1128 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1129 "/PDM/BlkCache/CacheMisses",
1130 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1131 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1132 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1133 "/PDM/BlkCache/CacheRead",
1134 STAMUNIT_BYTES, "Number of bytes read from the cache");
1135 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1136 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1137 "/PDM/BlkCache/CacheWritten",
1138 STAMUNIT_BYTES, "Number of bytes written to the cache");
1139 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1140 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1141 "/PDM/BlkCache/CacheTreeGet",
1142 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1143 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1144 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1145 "/PDM/BlkCache/CacheTreeInsert",
1146 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1147 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1148 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1149 "/PDM/BlkCache/CacheTreeRemove",
1150 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1151 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1152 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1153 "/PDM/BlkCache/CacheBuffersReused",
1154 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1155#endif
1156
1157 /* Initialize the critical section */
1158 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1159 }
1160
1161 if (RT_SUCCESS(rc))
1162 {
1163 /* Create the commit timer */
1164 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1165 rc = TMR3TimerCreate(pVM, TMCLOCK_REAL, pdmBlkCacheCommitTimerCallback, pBlkCacheGlobal,
1166 TMTIMER_FLAGS_NO_RING0, "BlkCache-Commit", &pBlkCacheGlobal->hTimerCommit);
1167
1168 if (RT_SUCCESS(rc))
1169 {
1170 /* Register saved state handler. */
1171 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1172 NULL, NULL, NULL,
1173 NULL, pdmR3BlkCacheSaveExec, NULL,
1174 NULL, pdmR3BlkCacheLoadExec, NULL);
1175 if (RT_SUCCESS(rc))
1176 {
1177 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1178 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1179 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1180 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1181 return VINF_SUCCESS;
1182 }
1183 }
1184
1185 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1186 }
1187
1188 RTMemFree(pBlkCacheGlobal);
1189
1190 LogFlowFunc((": returns rc=%Rrc\n", rc));
1191 return rc;
1192}
1193
1194void pdmR3BlkCacheTerm(PVM pVM)
1195{
1196 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1197
1198 if (pBlkCacheGlobal)
1199 {
1200 /* Make sure no one else uses the cache now */
1201 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1202
1203 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1204 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1205 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1206 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1207
1208 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1209
1210 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1211 RTMemFree(pBlkCacheGlobal);
1212 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1213 }
1214}
1215
1216int pdmR3BlkCacheResume(PVM pVM)
1217{
1218 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1219
1220 LogFlowFunc(("pVM=%#p\n", pVM));
1221
1222 if ( pBlkCacheGlobal
1223 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1224 {
1225 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1226 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1227 }
1228
1229 return VINF_SUCCESS;
1230}
1231
1232static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1233{
1234 int rc = VINF_SUCCESS;
1235 PPDMBLKCACHE pBlkCache = NULL;
1236 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1237
1238 if (!pBlkCacheGlobal)
1239 return VERR_NOT_SUPPORTED;
1240
1241 /*
1242 * Check that no other user cache has the same id first,
1243 * Unique id's are necessary in case the state is saved.
1244 */
1245 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1246
1247 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1248
1249 if (!pBlkCache)
1250 {
1251 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1252
1253 if (pBlkCache)
1254 pBlkCache->pszId = RTStrDup(pcszId);
1255
1256 if ( pBlkCache
1257 && pBlkCache->pszId)
1258 {
1259 pBlkCache->fSuspended = false;
1260 pBlkCache->cIoXfersActive = 0;
1261 pBlkCache->pCache = pBlkCacheGlobal;
1262 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1263
1264 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1265 if (RT_SUCCESS(rc))
1266 {
1267 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1268 if (RT_SUCCESS(rc))
1269 {
1270 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1271 if (pBlkCache->pTree)
1272 {
1273#ifdef VBOX_WITH_STATISTICS
1274 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1275 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1276 STAMUNIT_COUNT, "Number of deferred writes",
1277 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1278#endif
1279
1280 /* Add to the list of users. */
1281 pBlkCacheGlobal->cRefs++;
1282 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1283 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1284
1285 *ppBlkCache = pBlkCache;
1286 LogFlowFunc(("returns success\n"));
1287 return VINF_SUCCESS;
1288 }
1289
1290 rc = VERR_NO_MEMORY;
1291 RTSemRWDestroy(pBlkCache->SemRWEntries);
1292 }
1293
1294 RTSpinlockDestroy(pBlkCache->LockList);
1295 }
1296
1297 RTStrFree(pBlkCache->pszId);
1298 }
1299 else
1300 rc = VERR_NO_MEMORY;
1301
1302 if (pBlkCache)
1303 RTMemFree(pBlkCache);
1304 }
1305 else
1306 rc = VERR_ALREADY_EXISTS;
1307
1308 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1309
1310 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1311 return rc;
1312}
1313
1314VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1315 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1316 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1317 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1318 const char *pcszId)
1319{
1320 int rc = VINF_SUCCESS;
1321 PPDMBLKCACHE pBlkCache;
1322
1323 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1324 if (RT_SUCCESS(rc))
1325 {
1326 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1327 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1328 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1329 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1330 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1331 *ppBlkCache = pBlkCache;
1332 }
1333
1334 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1335 return rc;
1336}
1337
1338VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1339 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1340 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1341 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1342 const char *pcszId)
1343{
1344 int rc = VINF_SUCCESS;
1345 PPDMBLKCACHE pBlkCache;
1346
1347 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1348 if (RT_SUCCESS(rc))
1349 {
1350 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1351 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1352 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1353 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1354 pBlkCache->u.Dev.pDevIns = pDevIns;
1355 *ppBlkCache = pBlkCache;
1356 }
1357
1358 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1359 return rc;
1360
1361}
1362
1363VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1364 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1365 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1366 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1367 const char *pcszId)
1368{
1369 int rc = VINF_SUCCESS;
1370 PPDMBLKCACHE pBlkCache;
1371
1372 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1373 if (RT_SUCCESS(rc))
1374 {
1375 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1376 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1377 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1378 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1379 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1380 *ppBlkCache = pBlkCache;
1381 }
1382
1383 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1384 return rc;
1385
1386}
1387
1388VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1389 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1390 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1391 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1392 const char *pcszId)
1393{
1394 int rc = VINF_SUCCESS;
1395 PPDMBLKCACHE pBlkCache;
1396
1397 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1398 if (RT_SUCCESS(rc))
1399 {
1400 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1401 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1402 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1403 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1404 pBlkCache->u.Int.pvUser = pvUser;
1405 *ppBlkCache = pBlkCache;
1406 }
1407
1408 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1409 return rc;
1410
1411}
1412
1413/**
1414 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1415 *
1416 * @returns IPRT status code.
1417 * @param pNode The node to destroy.
1418 * @param pvUser Opaque user data.
1419 */
1420static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1421{
1422 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1423 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1424 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1425
1426 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1427 {
1428 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1429 pdmBlkCacheEntryRef(pEntry);
1430 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1431 pdmBlkCacheLockLeave(pCache);
1432
1433 RTThreadSleep(250);
1434
1435 /* Re-enter all locks */
1436 pdmBlkCacheLockEnter(pCache);
1437 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1438 pdmBlkCacheEntryRelease(pEntry);
1439 }
1440
1441 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1442 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1443
1444 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1445 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1446
1447 pdmBlkCacheEntryRemoveFromList(pEntry);
1448
1449 if (fUpdateCache)
1450 pdmBlkCacheSub(pCache, pEntry->cbData);
1451
1452 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1453 RTMemFree(pEntry);
1454
1455 return VINF_SUCCESS;
1456}
1457
1458VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1459{
1460 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1461
1462 /*
1463 * Commit all dirty entries now (they are waited on for completion during the
1464 * destruction of the AVL tree below).
1465 * The exception is if the VM was paused because of an I/O error before.
1466 */
1467 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1468 pdmBlkCacheCommit(pBlkCache);
1469
1470 /* Make sure nobody is accessing the cache while we delete the tree. */
1471 pdmBlkCacheLockEnter(pCache);
1472 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1473 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1474 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1475
1476 RTSpinlockDestroy(pBlkCache->LockList);
1477
1478 pCache->cRefs--;
1479 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1480
1481 pdmBlkCacheLockLeave(pCache);
1482
1483 RTMemFree(pBlkCache->pTree);
1484 pBlkCache->pTree = NULL;
1485 RTSemRWDestroy(pBlkCache->SemRWEntries);
1486
1487#ifdef VBOX_WITH_STATISTICS
1488 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1489#endif
1490
1491 RTStrFree(pBlkCache->pszId);
1492 RTMemFree(pBlkCache);
1493}
1494
1495VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1496{
1497 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1498
1499 /*
1500 * Validate input.
1501 */
1502 if (!pDevIns)
1503 return;
1504 VM_ASSERT_EMT(pVM);
1505
1506 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1507 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1508
1509 /* Return silently if not supported. */
1510 if (!pBlkCacheGlobal)
1511 return;
1512
1513 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1514
1515 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1516 {
1517 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1518 && pBlkCache->u.Dev.pDevIns == pDevIns)
1519 PDMR3BlkCacheRelease(pBlkCache);
1520 }
1521
1522 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1523}
1524
1525VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1526{
1527 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1528
1529 /*
1530 * Validate input.
1531 */
1532 if (!pDrvIns)
1533 return;
1534 VM_ASSERT_EMT(pVM);
1535
1536 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1537 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1538
1539 /* Return silently if not supported. */
1540 if (!pBlkCacheGlobal)
1541 return;
1542
1543 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1544
1545 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1546 {
1547 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1548 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1549 PDMR3BlkCacheRelease(pBlkCache);
1550 }
1551
1552 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1553}
1554
1555VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1556{
1557 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1558
1559 /*
1560 * Validate input.
1561 */
1562 if (!pUsbIns)
1563 return;
1564 VM_ASSERT_EMT(pVM);
1565
1566 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1567 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1568
1569 /* Return silently if not supported. */
1570 if (!pBlkCacheGlobal)
1571 return;
1572
1573 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1574
1575 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1576 {
1577 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1578 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1579 PDMR3BlkCacheRelease(pBlkCache);
1580 }
1581
1582 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1583}
1584
1585static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1586{
1587 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1588
1589 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1590 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1591 if (pEntry)
1592 pdmBlkCacheEntryRef(pEntry);
1593 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1594
1595 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1596
1597 return pEntry;
1598}
1599
1600/**
1601 * Return the best fit cache entries for the given offset.
1602 *
1603 * @param pBlkCache The endpoint cache.
1604 * @param off The offset.
1605 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1606 * the given offset. NULL if not required.
1607 */
1608static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1609{
1610 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1611
1612 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1613 if (ppEntryAbove)
1614 {
1615 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1616 if (*ppEntryAbove)
1617 pdmBlkCacheEntryRef(*ppEntryAbove);
1618 }
1619
1620 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1621
1622 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1623}
1624
1625static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1626{
1627 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1628 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1629 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1630 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1631 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1632 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1633}
1634
1635/**
1636 * Allocates and initializes a new entry for the cache.
1637 * The entry has a reference count of 1.
1638 *
1639 * @returns Pointer to the new cache entry or NULL if out of memory.
1640 * @param pBlkCache The cache the entry belongs to.
1641 * @param off Start offset.
1642 * @param cbData Size of the cache entry.
1643 * @param pbBuffer Pointer to the buffer to use.
1644 * NULL if a new buffer should be allocated.
1645 * The buffer needs to have the same size of the entry.
1646 */
1647static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1648{
1649 AssertReturn(cbData <= UINT32_MAX, NULL);
1650 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1651
1652 if (RT_UNLIKELY(!pEntryNew))
1653 return NULL;
1654
1655 pEntryNew->Core.Key = off;
1656 pEntryNew->Core.KeyLast = off + cbData - 1;
1657 pEntryNew->pBlkCache = pBlkCache;
1658 pEntryNew->fFlags = 0;
1659 pEntryNew->cRefs = 1; /* We are using it now. */
1660 pEntryNew->pList = NULL;
1661 pEntryNew->cbData = (uint32_t)cbData;
1662 pEntryNew->pWaitingHead = NULL;
1663 pEntryNew->pWaitingTail = NULL;
1664 if (pbBuffer)
1665 pEntryNew->pbData = pbBuffer;
1666 else
1667 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1668
1669 if (RT_UNLIKELY(!pEntryNew->pbData))
1670 {
1671 RTMemFree(pEntryNew);
1672 return NULL;
1673 }
1674
1675 return pEntryNew;
1676}
1677
1678/**
1679 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1680 * in exclusive mode.
1681 *
1682 * @returns true if the flag in fSet is set and the one in fClear is clear.
1683 * false otherwise.
1684 * The R/W semaphore is only held if true is returned.
1685 *
1686 * @param pBlkCache The endpoint cache instance data.
1687 * @param pEntry The entry to check the flags for.
1688 * @param fSet The flag which is tested to be set.
1689 * @param fClear The flag which is tested to be clear.
1690 */
1691DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1692 PPDMBLKCACHEENTRY pEntry,
1693 uint32_t fSet, uint32_t fClear)
1694{
1695 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1696 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1697
1698 if (fPassed)
1699 {
1700 /* Acquire the lock and check again because the completion callback might have raced us. */
1701 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1702
1703 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1704 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1705
1706 /* Drop the lock if we didn't passed the test. */
1707 if (!fPassed)
1708 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1709 }
1710
1711 return fPassed;
1712}
1713
1714/**
1715 * Adds a segment to the waiting list for a cache entry
1716 * which is currently in progress.
1717 *
1718 * @param pEntry The cache entry to add the segment to.
1719 * @param pWaiter The waiter entry to add.
1720 */
1721DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEWAITER pWaiter)
1722{
1723 pWaiter->pNext = NULL;
1724
1725 if (pEntry->pWaitingHead)
1726 {
1727 AssertPtr(pEntry->pWaitingTail);
1728
1729 pEntry->pWaitingTail->pNext = pWaiter;
1730 pEntry->pWaitingTail = pWaiter;
1731 }
1732 else
1733 {
1734 Assert(!pEntry->pWaitingTail);
1735
1736 pEntry->pWaitingHead = pWaiter;
1737 pEntry->pWaitingTail = pWaiter;
1738 }
1739}
1740
1741/**
1742 * Add a buffer described by the I/O memory context
1743 * to the entry waiting for completion.
1744 *
1745 * @returns VBox status code.
1746 * @param pEntry The entry to add the buffer to.
1747 * @param pReq The request.
1748 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1749 * @param offDiff Offset from the start of the buffer in the entry.
1750 * @param cbData Amount of data to wait for onthis entry.
1751 * @param fWrite Flag whether the task waits because it wants to write to
1752 * the cache entry.
1753 */
1754static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1755 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1756{
1757 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1758 if (!pWaiter)
1759 return VERR_NO_MEMORY;
1760
1761 ASMAtomicIncU32(&pReq->cXfersPending);
1762 pWaiter->pReq = pReq;
1763 pWaiter->offCacheEntry = offDiff;
1764 pWaiter->cbTransfer = cbData;
1765 pWaiter->fWrite = fWrite;
1766 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1767 RTSgBufAdvance(pSgBuf, cbData);
1768
1769 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1770
1771 return VINF_SUCCESS;
1772}
1773
1774/**
1775 * Calculate aligned offset and size for a new cache entry which do not
1776 * intersect with an already existing entry and the file end.
1777 *
1778 * @returns The number of bytes the entry can hold of the requested amount
1779 * of bytes.
1780 * @param pBlkCache The endpoint cache.
1781 * @param off The start offset.
1782 * @param cb The number of bytes the entry needs to hold at
1783 * least.
1784 * @param pcbEntry Where to store the number of bytes the entry can hold.
1785 * Can be less than given because of other entries.
1786 */
1787static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1788 uint64_t off, uint32_t cb,
1789 uint32_t *pcbEntry)
1790{
1791 /* Get the best fit entries around the offset */
1792 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1793 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1794
1795 /* Log the info */
1796 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1797 pEntryAbove ? "B" : "No b",
1798 off,
1799 pEntryAbove ? pEntryAbove->Core.Key : 0,
1800 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1801 pEntryAbove ? pEntryAbove->cbData : 0));
1802
1803 uint32_t cbNext;
1804 uint32_t cbInEntry;
1805 if ( pEntryAbove
1806 && off + cb > pEntryAbove->Core.Key)
1807 {
1808 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1809 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1810 }
1811 else
1812 {
1813 cbInEntry = cb;
1814 cbNext = cb;
1815 }
1816
1817 /* A few sanity checks */
1818 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1819 ("Aligned size intersects with another cache entry\n"));
1820 Assert(cbInEntry <= cbNext);
1821
1822 if (pEntryAbove)
1823 pdmBlkCacheEntryRelease(pEntryAbove);
1824
1825 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1826
1827 *pcbEntry = cbNext;
1828
1829 return cbInEntry;
1830}
1831
1832/**
1833 * Create a new cache entry evicting data from the cache if required.
1834 *
1835 * @returns Pointer to the new cache entry or NULL
1836 * if not enough bytes could be evicted from the cache.
1837 * @param pBlkCache The endpoint cache.
1838 * @param off The offset.
1839 * @param cb Number of bytes the cache entry should have.
1840 * @param pcbData Where to store the number of bytes the new
1841 * entry can hold. May be lower than actually
1842 * requested due to another entry intersecting the
1843 * access range.
1844 */
1845static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1846{
1847 uint32_t cbEntry = 0;
1848
1849 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1850 AssertReturn(cb <= UINT32_MAX, NULL);
1851
1852 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1853 pdmBlkCacheLockEnter(pCache);
1854
1855 PPDMBLKCACHEENTRY pEntryNew = NULL;
1856 uint8_t *pbBuffer = NULL;
1857 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1858 if (fEnough)
1859 {
1860 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1861
1862 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1863 if (RT_LIKELY(pEntryNew))
1864 {
1865 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1866 pdmBlkCacheAdd(pCache, cbEntry);
1867 pdmBlkCacheLockLeave(pCache);
1868
1869 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1870
1871 AssertMsg( (off >= pEntryNew->Core.Key)
1872 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1873 ("Overflow in calculation off=%llu\n", off));
1874 }
1875 else
1876 pdmBlkCacheLockLeave(pCache);
1877 }
1878 else
1879 pdmBlkCacheLockLeave(pCache);
1880
1881 return pEntryNew;
1882}
1883
1884static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1885{
1886 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1887
1888 if (RT_LIKELY(pReq))
1889 {
1890 pReq->pvUser = pvUser;
1891 pReq->rcReq = VINF_SUCCESS;
1892 pReq->cXfersPending = 0;
1893 }
1894
1895 return pReq;
1896}
1897
1898static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1899{
1900 switch (pBlkCache->enmType)
1901 {
1902 case PDMBLKCACHETYPE_DEV:
1903 {
1904 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1905 pReq->pvUser, pReq->rcReq);
1906 break;
1907 }
1908 case PDMBLKCACHETYPE_DRV:
1909 {
1910 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1911 pReq->pvUser, pReq->rcReq);
1912 break;
1913 }
1914 case PDMBLKCACHETYPE_USB:
1915 {
1916 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1917 pReq->pvUser, pReq->rcReq);
1918 break;
1919 }
1920 case PDMBLKCACHETYPE_INTERNAL:
1921 {
1922 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1923 pReq->pvUser, pReq->rcReq);
1924 break;
1925 }
1926 default:
1927 AssertMsgFailed(("Unknown block cache type!\n"));
1928 }
1929
1930 RTMemFree(pReq);
1931}
1932
1933static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1934 int rcReq, bool fCallHandler)
1935{
1936 if (RT_FAILURE(rcReq))
1937 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1938
1939 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1940 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1941
1942 if (!cXfersPending)
1943 {
1944 if (fCallHandler)
1945 pdmBlkCacheReqComplete(pBlkCache, pReq);
1946 return true;
1947 }
1948
1949 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1950 return false;
1951}
1952
1953VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1954 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1955{
1956 int rc = VINF_SUCCESS;
1957 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1958 PPDMBLKCACHEENTRY pEntry;
1959 PPDMBLKCACHEREQ pReq;
1960
1961 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1962 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1963
1964 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1965 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1966
1967 RTSGBUF SgBuf;
1968 RTSgBufClone(&SgBuf, pSgBuf);
1969
1970 /* Allocate new request structure. */
1971 pReq = pdmBlkCacheReqAlloc(pvUser);
1972 if (RT_UNLIKELY(!pReq))
1973 return VERR_NO_MEMORY;
1974
1975 /* Increment data transfer counter to keep the request valid while we access it. */
1976 ASMAtomicIncU32(&pReq->cXfersPending);
1977
1978 while (cbRead)
1979 {
1980 size_t cbToRead;
1981
1982 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1983
1984 /*
1985 * If there is no entry we try to create a new one eviciting unused pages
1986 * if the cache is full. If this is not possible we will pass the request through
1987 * and skip the caching (all entries may be still in progress so they can't
1988 * be evicted)
1989 * If we have an entry it can be in one of the LRU lists where the entry
1990 * contains data (recently used or frequently used LRU) so we can just read
1991 * the data we need and put the entry at the head of the frequently used LRU list.
1992 * In case the entry is in one of the ghost lists it doesn't contain any data.
1993 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1994 */
1995 if (pEntry)
1996 {
1997 uint64_t offDiff = off - pEntry->Core.Key;
1998
1999 AssertMsg(off >= pEntry->Core.Key,
2000 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2001 off, pEntry->Core.Key));
2002
2003 AssertPtr(pEntry->pList);
2004
2005 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2006
2007 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2008 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2009 off, cbToRead));
2010
2011 cbRead -= cbToRead;
2012
2013 if (!cbRead)
2014 STAM_COUNTER_INC(&pCache->cHits);
2015 else
2016 STAM_COUNTER_INC(&pCache->cPartialHits);
2017
2018 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2019
2020 /* Ghost lists contain no data. */
2021 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2022 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2023 {
2024 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2025 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2026 PDMBLKCACHE_ENTRY_IS_DIRTY))
2027 {
2028 /* Entry didn't completed yet. Append to the list */
2029 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2030 &SgBuf, offDiff, cbToRead,
2031 false /* fWrite */);
2032 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2033 }
2034 else
2035 {
2036 /* Read as much as we can from the entry. */
2037 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2038 }
2039
2040 /* Move this entry to the top position */
2041 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2042 {
2043 pdmBlkCacheLockEnter(pCache);
2044 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2045 pdmBlkCacheLockLeave(pCache);
2046 }
2047 /* Release the entry */
2048 pdmBlkCacheEntryRelease(pEntry);
2049 }
2050 else
2051 {
2052 uint8_t *pbBuffer = NULL;
2053
2054 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2055
2056 pdmBlkCacheLockEnter(pCache);
2057 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2058 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2059
2060 /* Move the entry to Am and fetch it to the cache. */
2061 if (fEnough)
2062 {
2063 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2064 pdmBlkCacheAdd(pCache, pEntry->cbData);
2065 pdmBlkCacheLockLeave(pCache);
2066
2067 if (pbBuffer)
2068 pEntry->pbData = pbBuffer;
2069 else
2070 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2071 AssertPtr(pEntry->pbData);
2072
2073 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2074 &SgBuf, offDiff, cbToRead,
2075 false /* fWrite */);
2076 pdmBlkCacheEntryReadFromMedium(pEntry);
2077 /* Release the entry */
2078 pdmBlkCacheEntryRelease(pEntry);
2079 }
2080 else
2081 {
2082 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2083 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2084 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2085 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2086 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2087
2088 pdmBlkCacheLockLeave(pCache);
2089
2090 RTMemFree(pEntry);
2091
2092 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2093 &SgBuf, off, cbToRead,
2094 PDMBLKCACHEXFERDIR_READ);
2095 }
2096 }
2097 }
2098 else
2099 {
2100#ifdef VBOX_WITH_IO_READ_CACHE
2101 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2102 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2103 off, cbRead,
2104 &cbToRead);
2105
2106 cbRead -= cbToRead;
2107
2108 if (pEntryNew)
2109 {
2110 if (!cbRead)
2111 STAM_COUNTER_INC(&pCache->cMisses);
2112 else
2113 STAM_COUNTER_INC(&pCache->cPartialHits);
2114
2115 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2116 &SgBuf,
2117 off - pEntryNew->Core.Key,
2118 cbToRead,
2119 false /* fWrite */);
2120 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2121 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2122 }
2123 else
2124 {
2125 /*
2126 * There is not enough free space in the cache.
2127 * Pass the request directly to the I/O manager.
2128 */
2129 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2130
2131 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2132 &SgBuf, off, cbToRead,
2133 PDMBLKCACHEXFERDIR_READ);
2134 }
2135#else
2136 /* Clip read size if necessary. */
2137 PPDMBLKCACHEENTRY pEntryAbove;
2138 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2139
2140 if (pEntryAbove)
2141 {
2142 if (off + cbRead > pEntryAbove->Core.Key)
2143 cbToRead = pEntryAbove->Core.Key - off;
2144 else
2145 cbToRead = cbRead;
2146
2147 pdmBlkCacheEntryRelease(pEntryAbove);
2148 }
2149 else
2150 cbToRead = cbRead;
2151
2152 cbRead -= cbToRead;
2153 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2154 &SgBuf, off, cbToRead,
2155 PDMBLKCACHEXFERDIR_READ);
2156#endif
2157 }
2158 off += cbToRead;
2159 }
2160
2161 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2162 rc = VINF_AIO_TASK_PENDING;
2163 else
2164 {
2165 rc = pReq->rcReq;
2166 RTMemFree(pReq);
2167 }
2168
2169 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2170
2171 return rc;
2172}
2173
2174VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2175{
2176 int rc = VINF_SUCCESS;
2177 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2178 PPDMBLKCACHEENTRY pEntry;
2179 PPDMBLKCACHEREQ pReq;
2180
2181 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2182 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2183
2184 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2185 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2186
2187 RTSGBUF SgBuf;
2188 RTSgBufClone(&SgBuf, pSgBuf);
2189
2190 /* Allocate new request structure. */
2191 pReq = pdmBlkCacheReqAlloc(pvUser);
2192 if (RT_UNLIKELY(!pReq))
2193 return VERR_NO_MEMORY;
2194
2195 /* Increment data transfer counter to keep the request valid while we access it. */
2196 ASMAtomicIncU32(&pReq->cXfersPending);
2197
2198 while (cbWrite)
2199 {
2200 size_t cbToWrite;
2201
2202 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2203 if (pEntry)
2204 {
2205 /* Write the data into the entry and mark it as dirty */
2206 AssertPtr(pEntry->pList);
2207
2208 uint64_t offDiff = off - pEntry->Core.Key;
2209 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2210
2211 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2212 cbWrite -= cbToWrite;
2213
2214 if (!cbWrite)
2215 STAM_COUNTER_INC(&pCache->cHits);
2216 else
2217 STAM_COUNTER_INC(&pCache->cPartialHits);
2218
2219 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2220
2221 /* Ghost lists contain no data. */
2222 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2223 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2224 {
2225 /* Check if the entry is dirty. */
2226 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2227 PDMBLKCACHE_ENTRY_IS_DIRTY,
2228 0))
2229 {
2230 /* If it is already dirty but not in progress just update the data. */
2231 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2232 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2233 else
2234 {
2235 /* The data isn't written to the file yet */
2236 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2237 &SgBuf, offDiff, cbToWrite,
2238 true /* fWrite */);
2239 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2240 }
2241
2242 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2243 }
2244 else /* Dirty bit not set */
2245 {
2246 /*
2247 * Check if a read is in progress for this entry.
2248 * We have to defer processing in that case.
2249 */
2250 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2251 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2252 0))
2253 {
2254 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2255 &SgBuf, offDiff, cbToWrite,
2256 true /* fWrite */);
2257 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2258 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2259 }
2260 else /* I/O in progress flag not set */
2261 {
2262 /* Write as much as we can into the entry and update the file. */
2263 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2264
2265 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2266 if (fCommit)
2267 pdmBlkCacheCommitDirtyEntries(pCache);
2268 }
2269 } /* Dirty bit not set */
2270
2271 /* Move this entry to the top position */
2272 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2273 {
2274 pdmBlkCacheLockEnter(pCache);
2275 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2276 pdmBlkCacheLockLeave(pCache);
2277 }
2278
2279 pdmBlkCacheEntryRelease(pEntry);
2280 }
2281 else /* Entry is on the ghost list */
2282 {
2283 uint8_t *pbBuffer = NULL;
2284
2285 pdmBlkCacheLockEnter(pCache);
2286 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2287 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2288
2289 if (fEnough)
2290 {
2291 /* Move the entry to Am and fetch it to the cache. */
2292 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2293 pdmBlkCacheAdd(pCache, pEntry->cbData);
2294 pdmBlkCacheLockLeave(pCache);
2295
2296 if (pbBuffer)
2297 pEntry->pbData = pbBuffer;
2298 else
2299 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2300 AssertPtr(pEntry->pbData);
2301
2302 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2303 &SgBuf, offDiff, cbToWrite,
2304 true /* fWrite */);
2305 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2306 pdmBlkCacheEntryReadFromMedium(pEntry);
2307
2308 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2309 pdmBlkCacheEntryRelease(pEntry);
2310 }
2311 else
2312 {
2313 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2314 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2315 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2316 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2317 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2318
2319 pdmBlkCacheLockLeave(pCache);
2320
2321 RTMemFree(pEntry);
2322 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2323 &SgBuf, off, cbToWrite,
2324 PDMBLKCACHEXFERDIR_WRITE);
2325 }
2326 }
2327 }
2328 else /* No entry found */
2329 {
2330 /*
2331 * No entry found. Try to create a new cache entry to store the data in and if that fails
2332 * write directly to the file.
2333 */
2334 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2335 off, cbWrite,
2336 &cbToWrite);
2337
2338 cbWrite -= cbToWrite;
2339
2340 if (pEntryNew)
2341 {
2342 uint64_t offDiff = off - pEntryNew->Core.Key;
2343
2344 STAM_COUNTER_INC(&pCache->cHits);
2345
2346 /*
2347 * Check if it is possible to just write the data without waiting
2348 * for it to get fetched first.
2349 */
2350 if (!offDiff && pEntryNew->cbData == cbToWrite)
2351 {
2352 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2353
2354 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2355 if (fCommit)
2356 pdmBlkCacheCommitDirtyEntries(pCache);
2357 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2358 }
2359 else
2360 {
2361 /* Defer the write and fetch the data from the endpoint. */
2362 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2363 &SgBuf, offDiff, cbToWrite,
2364 true /* fWrite */);
2365 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2366 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2367 }
2368
2369 pdmBlkCacheEntryRelease(pEntryNew);
2370 }
2371 else
2372 {
2373 /*
2374 * There is not enough free space in the cache.
2375 * Pass the request directly to the I/O manager.
2376 */
2377 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2378
2379 STAM_COUNTER_INC(&pCache->cMisses);
2380
2381 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2382 &SgBuf, off, cbToWrite,
2383 PDMBLKCACHEXFERDIR_WRITE);
2384 }
2385 }
2386
2387 off += cbToWrite;
2388 }
2389
2390 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2391 rc = VINF_AIO_TASK_PENDING;
2392 else
2393 {
2394 rc = pReq->rcReq;
2395 RTMemFree(pReq);
2396 }
2397
2398 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2399
2400 return rc;
2401}
2402
2403VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2404{
2405 int rc = VINF_SUCCESS;
2406 PPDMBLKCACHEREQ pReq;
2407
2408 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2409
2410 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2411 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2412
2413 /* Commit dirty entries in the cache. */
2414 pdmBlkCacheCommit(pBlkCache);
2415
2416 /* Allocate new request structure. */
2417 pReq = pdmBlkCacheReqAlloc(pvUser);
2418 if (RT_UNLIKELY(!pReq))
2419 return VERR_NO_MEMORY;
2420
2421 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2422 PDMBLKCACHEXFERDIR_FLUSH);
2423 AssertRC(rc);
2424
2425 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2426 return VINF_AIO_TASK_PENDING;
2427}
2428
2429VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2430 unsigned cRanges, void *pvUser)
2431{
2432 int rc = VINF_SUCCESS;
2433 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2434 PPDMBLKCACHEENTRY pEntry;
2435 PPDMBLKCACHEREQ pReq;
2436
2437 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2438 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2439
2440 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2441 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2442
2443 /* Allocate new request structure. */
2444 pReq = pdmBlkCacheReqAlloc(pvUser);
2445 if (RT_UNLIKELY(!pReq))
2446 return VERR_NO_MEMORY;
2447
2448 /* Increment data transfer counter to keep the request valid while we access it. */
2449 ASMAtomicIncU32(&pReq->cXfersPending);
2450
2451 for (unsigned i = 0; i < cRanges; i++)
2452 {
2453 uint64_t offCur = paRanges[i].offStart;
2454 size_t cbLeft = paRanges[i].cbRange;
2455
2456 while (cbLeft)
2457 {
2458 size_t cbThisDiscard = 0;
2459
2460 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2461
2462 if (pEntry)
2463 {
2464 /* Write the data into the entry and mark it as dirty */
2465 AssertPtr(pEntry->pList);
2466
2467 uint64_t offDiff = offCur - pEntry->Core.Key;
2468
2469 AssertMsg(offCur >= pEntry->Core.Key,
2470 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2471 offCur, pEntry->Core.Key));
2472
2473 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2474
2475 /* Ghost lists contain no data. */
2476 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2477 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2478 {
2479 /* Check if the entry is dirty. */
2480 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2481 PDMBLKCACHE_ENTRY_IS_DIRTY,
2482 0))
2483 {
2484 /* If it is dirty but not yet in progress remove it. */
2485 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2486 {
2487 pdmBlkCacheLockEnter(pCache);
2488 pdmBlkCacheEntryRemoveFromList(pEntry);
2489
2490 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2491 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2492 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2493
2494 pdmBlkCacheLockLeave(pCache);
2495
2496 RTMemFree(pEntry);
2497 }
2498 else
2499 {
2500#if 0
2501 /* The data isn't written to the file yet */
2502 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2503 &SgBuf, offDiff, cbToWrite,
2504 true /* fWrite */);
2505 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2506#endif
2507 }
2508
2509 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2510 pdmBlkCacheEntryRelease(pEntry);
2511 }
2512 else /* Dirty bit not set */
2513 {
2514 /*
2515 * Check if a read is in progress for this entry.
2516 * We have to defer processing in that case.
2517 */
2518 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2519 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2520 0))
2521 {
2522#if 0
2523 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2524 &SgBuf, offDiff, cbToWrite,
2525 true /* fWrite */);
2526#endif
2527 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2528 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2529 pdmBlkCacheEntryRelease(pEntry);
2530 }
2531 else /* I/O in progress flag not set */
2532 {
2533 pdmBlkCacheLockEnter(pCache);
2534 pdmBlkCacheEntryRemoveFromList(pEntry);
2535
2536 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2537 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2538 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2539 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2540 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2541
2542 pdmBlkCacheLockLeave(pCache);
2543
2544 RTMemFree(pEntry);
2545 }
2546 } /* Dirty bit not set */
2547 }
2548 else /* Entry is on the ghost list just remove cache entry. */
2549 {
2550 pdmBlkCacheLockEnter(pCache);
2551 pdmBlkCacheEntryRemoveFromList(pEntry);
2552
2553 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2554 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2555 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2556 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2557 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2558
2559 pdmBlkCacheLockLeave(pCache);
2560
2561 RTMemFree(pEntry);
2562 }
2563 }
2564 /* else: no entry found. */
2565
2566 offCur += cbThisDiscard;
2567 cbLeft -= cbThisDiscard;
2568 }
2569 }
2570
2571 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2572 rc = VINF_AIO_TASK_PENDING;
2573 else
2574 {
2575 rc = pReq->rcReq;
2576 RTMemFree(pReq);
2577 }
2578
2579 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2580
2581 return rc;
2582}
2583
2584/**
2585 * Completes a task segment freeing all resources and completes the task handle
2586 * if everything was transferred.
2587 *
2588 * @returns Next task segment handle.
2589 * @param pBlkCache The endpoint block cache.
2590 * @param pWaiter Task segment to complete.
2591 * @param rc Status code to set.
2592 */
2593static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2594{
2595 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2596 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2597
2598 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2599
2600 RTMemFree(pWaiter);
2601
2602 return pNext;
2603}
2604
2605static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2606{
2607 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2608 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2609
2610 /* Reference the entry now as we are clearing the I/O in progress flag
2611 * which protected the entry till now. */
2612 pdmBlkCacheEntryRef(pEntry);
2613
2614 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2615 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2616
2617 /* Process waiting segment list. The data in entry might have changed in-between. */
2618 bool fDirty = false;
2619 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2620 PPDMBLKCACHEWAITER pCurr = pComplete;
2621
2622 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2623 ("The list tail was not updated correctly\n"));
2624 pEntry->pWaitingTail = NULL;
2625 pEntry->pWaitingHead = NULL;
2626
2627 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2628 {
2629 /*
2630 * An error here is difficult to handle as the original request completed already.
2631 * The error is logged for now and the VM is paused.
2632 * If the user continues the entry is written again in the hope
2633 * the user fixed the problem and the next write succeeds.
2634 */
2635 if (RT_FAILURE(rcIoXfer))
2636 {
2637 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2638 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2639
2640 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2641 {
2642 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2643 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2644 "Make sure there is enough free space on the disk and that the disk is working properly. "
2645 "Operation can be resumed afterwards"),
2646 pBlkCache->pszId, rcIoXfer);
2647 AssertRC(rc);
2648 }
2649
2650 /* Mark the entry as dirty again to get it added to the list later on. */
2651 fDirty = true;
2652 }
2653
2654 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2655
2656 while (pCurr)
2657 {
2658 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2659
2660 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2661 fDirty = true;
2662 pCurr = pCurr->pNext;
2663 }
2664 }
2665 else
2666 {
2667 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2668 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2669 ("Invalid flags set\n"));
2670
2671 while (pCurr)
2672 {
2673 if (pCurr->fWrite)
2674 {
2675 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2676 fDirty = true;
2677 }
2678 else
2679 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2680
2681 pCurr = pCurr->pNext;
2682 }
2683 }
2684
2685 bool fCommit = false;
2686 if (fDirty)
2687 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2688
2689 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2690
2691 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2692 pdmBlkCacheEntryRelease(pEntry);
2693
2694 if (fCommit)
2695 pdmBlkCacheCommitDirtyEntries(pCache);
2696
2697 /* Complete waiters now. */
2698 while (pComplete)
2699 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2700}
2701
2702VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2703{
2704 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2705
2706 if (hIoXfer->fIoCache)
2707 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2708 else
2709 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2710
2711 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2712 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2713 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2714 RTMemFree(hIoXfer);
2715}
2716
2717/**
2718 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2719 *
2720 * @returns IPRT status code.
2721 * @param pNode The node to destroy.
2722 * @param pvUser Opaque user data.
2723 */
2724static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2725{
2726 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2727 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2728 NOREF(pvUser);
2729
2730 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2731 {
2732 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2733 pdmBlkCacheEntryRef(pEntry);
2734 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2735
2736 RTThreadSleep(1);
2737
2738 /* Re-enter all locks and drop the reference. */
2739 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2740 pdmBlkCacheEntryRelease(pEntry);
2741 }
2742
2743 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2744 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2745
2746 return VINF_SUCCESS;
2747}
2748
2749VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2750{
2751 int rc = VINF_SUCCESS;
2752 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2753
2754 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2755
2756 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2757 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2758 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2759
2760 /* Wait for all I/O to complete. */
2761 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2762 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2763 AssertRC(rc);
2764 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2765
2766 return rc;
2767}
2768
2769VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2770{
2771 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2772
2773 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2774
2775 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2776
2777 return VINF_SUCCESS;
2778}
2779
2780VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2781{
2782 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2783
2784 /*
2785 * Commit all dirty entries now (they are waited on for completion during the
2786 * destruction of the AVL tree below).
2787 * The exception is if the VM was paused because of an I/O error before.
2788 */
2789 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2790 pdmBlkCacheCommit(pBlkCache);
2791
2792 /* Make sure nobody is accessing the cache while we delete the tree. */
2793 pdmBlkCacheLockEnter(pCache);
2794 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2795 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2796 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2797
2798 pdmBlkCacheLockLeave(pCache);
2799 return VINF_SUCCESS;
2800}
2801
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette