VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 80239

Last change on this file since 80239 was 80191, checked in by vboxsync, 5 years ago

VMM/r3: Refactored VMCPU enumeration in preparation that aCpus will be replaced with a pointer array. Removed two raw-mode offset members from the CPUM and CPUMCPU sub-structures. bugref:9217 bugref:9517

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 100.2 KB
Line 
1/* $Id: PDMBlkCache.cpp 80191 2019-08-08 00:36:57Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22
23/*********************************************************************************************************************************
24* Header Files *
25*********************************************************************************************************************************/
26#define VBOX_BUGREF_9217_PART_I
27#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
28#include "PDMInternal.h"
29#include <iprt/asm.h>
30#include <iprt/mem.h>
31#include <iprt/path.h>
32#include <iprt/string.h>
33#include <iprt/trace.h>
34#include <VBox/log.h>
35#include <VBox/vmm/stam.h>
36#include <VBox/vmm/uvm.h>
37#include <VBox/vmm/vm.h>
38
39#include "PDMBlkCacheInternal.h"
40
41
42/*********************************************************************************************************************************
43* Defined Constants And Macros *
44*********************************************************************************************************************************/
45#ifdef VBOX_STRICT
46# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
47 do \
48 { \
49 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
50 ("Thread does not own critical section\n"));\
51 } while (0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
57 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
58 } while (0)
59
60# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
61 do \
62 { \
63 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
64 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
65 } while (0)
66
67#else
68# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while (0)
69# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while (0)
70# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while (0)
71#endif
72
73#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
74
75/* Enable to enable some tracing in the block cache code for investigating issues. */
76/*#define VBOX_BLKCACHE_TRACING 1*/
77
78
79/*********************************************************************************************************************************
80* Internal Functions *
81*********************************************************************************************************************************/
82
83static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
84 uint64_t off, size_t cbData, uint8_t *pbBuffer);
85static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
86
87
88/**
89 * Add message to the VM trace buffer.
90 *
91 * @returns nothing.
92 * @param pBlkCache The block cache.
93 * @param pszFmt The format string.
94 * @param ... Additional parameters for the string formatter.
95 */
96DECLINLINE(void) pdmBlkCacheR3TraceMsgF(PPDMBLKCACHE pBlkCache, const char *pszFmt, ...)
97{
98#if defined(VBOX_BLKCACHE_TRACING)
99 va_list va;
100 va_start(va, pszFmt);
101 RTTraceBufAddMsgV(pBlkCache->pCache->pVM->CTX_SUFF(hTraceBuf), pszFmt, va);
102 va_end(va);
103#else
104 RT_NOREF2(pBlkCache, pszFmt);
105#endif
106}
107
108/**
109 * Decrement the reference counter of the given cache entry.
110 *
111 * @returns nothing.
112 * @param pEntry The entry to release.
113 */
114DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
115{
116 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
117 ASMAtomicDecU32(&pEntry->cRefs);
118}
119
120/**
121 * Increment the reference counter of the given cache entry.
122 *
123 * @returns nothing.
124 * @param pEntry The entry to reference.
125 */
126DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
127{
128 ASMAtomicIncU32(&pEntry->cRefs);
129}
130
131#ifdef VBOX_STRICT
132static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
133{
134 /* Amount of cached data should never exceed the maximum amount. */
135 AssertMsg(pCache->cbCached <= pCache->cbMax,
136 ("Current amount of cached data exceeds maximum\n"));
137
138 /* The amount of cached data in the LRU and FRU list should match cbCached */
139 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
140 ("Amount of cached data doesn't match\n"));
141
142 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
143 ("Paged out list exceeds maximum\n"));
144}
145#endif
146
147DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
148{
149 RTCritSectEnter(&pCache->CritSect);
150#ifdef VBOX_STRICT
151 pdmBlkCacheValidate(pCache);
152#endif
153}
154
155DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
156{
157#ifdef VBOX_STRICT
158 pdmBlkCacheValidate(pCache);
159#endif
160 RTCritSectLeave(&pCache->CritSect);
161}
162
163DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
164{
165 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
166 pCache->cbCached -= cbAmount;
167}
168
169DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
170{
171 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
172 pCache->cbCached += cbAmount;
173}
174
175DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
176{
177 pList->cbCached += cbAmount;
178}
179
180DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
181{
182 pList->cbCached -= cbAmount;
183}
184
185#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
186/**
187 * Checks consistency of a LRU list.
188 *
189 * @returns nothing
190 * @param pList The LRU list to check.
191 * @param pNotInList Element which is not allowed to occur in the list.
192 */
193static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
194{
195 PPDMBLKCACHEENTRY pCurr = pList->pHead;
196
197 /* Check that there are no double entries and no cycles in the list. */
198 while (pCurr)
199 {
200 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
201
202 while (pNext)
203 {
204 AssertMsg(pCurr != pNext,
205 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
206 pCurr, pList));
207 pNext = pNext->pNext;
208 }
209
210 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
211
212 if (!pCurr->pNext)
213 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
214
215 pCurr = pCurr->pNext;
216 }
217}
218#endif
219
220/**
221 * Unlinks a cache entry from the LRU list it is assigned to.
222 *
223 * @returns nothing.
224 * @param pEntry The entry to unlink.
225 */
226static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
227{
228 PPDMBLKLRULIST pList = pEntry->pList;
229 PPDMBLKCACHEENTRY pPrev, pNext;
230
231 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
232
233 AssertPtr(pList);
234
235#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
236 pdmBlkCacheCheckList(pList, NULL);
237#endif
238
239 pPrev = pEntry->pPrev;
240 pNext = pEntry->pNext;
241
242 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
243 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
244
245 if (pPrev)
246 pPrev->pNext = pNext;
247 else
248 {
249 pList->pHead = pNext;
250
251 if (pNext)
252 pNext->pPrev = NULL;
253 }
254
255 if (pNext)
256 pNext->pPrev = pPrev;
257 else
258 {
259 pList->pTail = pPrev;
260
261 if (pPrev)
262 pPrev->pNext = NULL;
263 }
264
265 pEntry->pList = NULL;
266 pEntry->pPrev = NULL;
267 pEntry->pNext = NULL;
268 pdmBlkCacheListSub(pList, pEntry->cbData);
269#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
270 pdmBlkCacheCheckList(pList, pEntry);
271#endif
272}
273
274/**
275 * Adds a cache entry to the given LRU list unlinking it from the currently
276 * assigned list if needed.
277 *
278 * @returns nothing.
279 * @param pList List to the add entry to.
280 * @param pEntry Entry to add.
281 */
282static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
283{
284 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
285#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
286 pdmBlkCacheCheckList(pList, NULL);
287#endif
288
289 /* Remove from old list if needed */
290 if (pEntry->pList)
291 pdmBlkCacheEntryRemoveFromList(pEntry);
292
293 pEntry->pNext = pList->pHead;
294 if (pList->pHead)
295 pList->pHead->pPrev = pEntry;
296 else
297 {
298 Assert(!pList->pTail);
299 pList->pTail = pEntry;
300 }
301
302 pEntry->pPrev = NULL;
303 pList->pHead = pEntry;
304 pdmBlkCacheListAdd(pList, pEntry->cbData);
305 pEntry->pList = pList;
306#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
307 pdmBlkCacheCheckList(pList, NULL);
308#endif
309}
310
311/**
312 * Destroys a LRU list freeing all entries.
313 *
314 * @returns nothing
315 * @param pList Pointer to the LRU list to destroy.
316 *
317 * @note The caller must own the critical section of the cache.
318 */
319static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
320{
321 while (pList->pHead)
322 {
323 PPDMBLKCACHEENTRY pEntry = pList->pHead;
324
325 pList->pHead = pEntry->pNext;
326
327 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
328 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
329
330 RTMemPageFree(pEntry->pbData, pEntry->cbData);
331 RTMemFree(pEntry);
332 }
333}
334
335/**
336 * Tries to remove the given amount of bytes from a given list in the cache
337 * moving the entries to one of the given ghosts lists
338 *
339 * @returns Amount of data which could be freed.
340 * @param pCache Pointer to the global cache data.
341 * @param cbData The amount of the data to free.
342 * @param pListSrc The source list to evict data from.
343 * @param pGhostListDst Where the ghost list removed entries should be
344 * moved to, NULL if the entry should be freed.
345 * @param fReuseBuffer Flag whether a buffer should be reused if it has
346 * the same size
347 * @param ppbBuffer Where to store the address of the buffer if an
348 * entry with the same size was found and
349 * fReuseBuffer is true.
350 *
351 * @note This function may return fewer bytes than requested because entries
352 * may be marked as non evictable if they are used for I/O at the
353 * moment.
354 */
355static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
356 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
357 bool fReuseBuffer, uint8_t **ppbBuffer)
358{
359 size_t cbEvicted = 0;
360
361 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
362
363 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
364 AssertMsg( !pGhostListDst
365 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
366 ("Destination list must be NULL or the recently used but paged out list\n"));
367
368 if (fReuseBuffer)
369 {
370 AssertPtr(ppbBuffer);
371 *ppbBuffer = NULL;
372 }
373
374 /* Start deleting from the tail. */
375 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
376
377 while ((cbEvicted < cbData) && pEntry)
378 {
379 PPDMBLKCACHEENTRY pCurr = pEntry;
380
381 pEntry = pEntry->pPrev;
382
383 /* We can't evict pages which are currently in progress or dirty but not in progress */
384 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
385 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
386 {
387 /* Ok eviction candidate. Grab the endpoint semaphore and check again
388 * because somebody else might have raced us. */
389 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
390 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
391
392 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
393 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
394 {
395 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
396
397 if (fReuseBuffer && pCurr->cbData == cbData)
398 {
399 STAM_COUNTER_INC(&pCache->StatBuffersReused);
400 *ppbBuffer = pCurr->pbData;
401 }
402 else if (pCurr->pbData)
403 RTMemPageFree(pCurr->pbData, pCurr->cbData);
404
405 pCurr->pbData = NULL;
406 cbEvicted += pCurr->cbData;
407
408 pdmBlkCacheEntryRemoveFromList(pCurr);
409 pdmBlkCacheSub(pCache, pCurr->cbData);
410
411 if (pGhostListDst)
412 {
413 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
414
415 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
416
417 /* We have to remove the last entries from the paged out list. */
418 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
419 && pGhostEntFree)
420 {
421 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
422 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
423
424 pGhostEntFree = pGhostEntFree->pPrev;
425
426 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
427
428 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
429 {
430 pdmBlkCacheEntryRemoveFromList(pFree);
431
432 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
433 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
434 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
435
436 RTMemFree(pFree);
437 }
438
439 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
440 }
441
442 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
443 {
444 /* Couldn't remove enough entries. Delete */
445 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
446 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
447 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
448
449 RTMemFree(pCurr);
450 }
451 else
452 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
453 }
454 else
455 {
456 /* Delete the entry from the AVL tree it is assigned to. */
457 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
458 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
459 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
460
461 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
462 RTMemFree(pCurr);
463 }
464 }
465
466 }
467 else
468 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
469 }
470
471 return cbEvicted;
472}
473
474static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
475{
476 size_t cbRemoved = 0;
477
478 if ((pCache->cbCached + cbData) < pCache->cbMax)
479 return true;
480 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
481 {
482 /* Try to evict as many bytes as possible from A1in */
483 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
484 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
485
486 /*
487 * If it was not possible to remove enough entries
488 * try the frequently accessed cache.
489 */
490 if (cbRemoved < cbData)
491 {
492 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
493
494 /*
495 * If we removed something we can't pass the reuse buffer flag anymore because
496 * we don't need to evict that much data
497 */
498 if (!cbRemoved)
499 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
500 NULL, fReuseBuffer, ppbBuffer);
501 else
502 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
503 NULL, false, NULL);
504 }
505 }
506 else
507 {
508 /* We have to remove entries from frequently access list. */
509 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
510 NULL, fReuseBuffer, ppbBuffer);
511 }
512
513 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
514 return (cbRemoved >= cbData);
515}
516
517DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
518{
519 int rc = VINF_SUCCESS;
520
521 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
522 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
523
524 ASMAtomicIncU32(&pBlkCache->cIoXfersActive);
525 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool , %d) queued (%u now active)",
526 pIoXfer, pIoXfer->fIoCache, pIoXfer->enmXferDir, pBlkCache->cIoXfersActive);
527
528 switch (pBlkCache->enmType)
529 {
530 case PDMBLKCACHETYPE_DEV:
531 {
532 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
533 pIoXfer->enmXferDir,
534 off, cbXfer,
535 &pIoXfer->SgBuf, pIoXfer);
536 break;
537 }
538 case PDMBLKCACHETYPE_DRV:
539 {
540 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
541 pIoXfer->enmXferDir,
542 off, cbXfer,
543 &pIoXfer->SgBuf, pIoXfer);
544 break;
545 }
546 case PDMBLKCACHETYPE_USB:
547 {
548 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
549 pIoXfer->enmXferDir,
550 off, cbXfer,
551 &pIoXfer->SgBuf, pIoXfer);
552 break;
553 }
554 case PDMBLKCACHETYPE_INTERNAL:
555 {
556 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
557 pIoXfer->enmXferDir,
558 off, cbXfer,
559 &pIoXfer->SgBuf, pIoXfer);
560 break;
561 }
562 default:
563 AssertMsgFailed(("Unknown block cache type!\n"));
564 }
565
566 if (RT_FAILURE(rc))
567 {
568 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: Queueing I/O req %#p failed %Rrc", pIoXfer, rc);
569 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
570 }
571
572 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
573 return rc;
574}
575
576/**
577 * Initiates a read I/O task for the given entry.
578 *
579 * @returns VBox status code.
580 * @param pEntry The entry to fetch the data to.
581 */
582static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
583{
584 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
585 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
586
587 /* Make sure no one evicts the entry while it is accessed. */
588 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
589
590 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
591 if (RT_UNLIKELY(!pIoXfer))
592 return VERR_NO_MEMORY;
593
594 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
595
596 pIoXfer->fIoCache = true;
597 pIoXfer->pEntry = pEntry;
598 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
599 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
600 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
601 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
602
603 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
604}
605
606/**
607 * Initiates a write I/O task for the given entry.
608 *
609 * @returns nothing.
610 * @param pEntry The entry to read the data from.
611 */
612static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
613{
614 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
615 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
616
617 /* Make sure no one evicts the entry while it is accessed. */
618 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
619
620 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
621 if (RT_UNLIKELY(!pIoXfer))
622 return VERR_NO_MEMORY;
623
624 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
625
626 pIoXfer->fIoCache = true;
627 pIoXfer->pEntry = pEntry;
628 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
629 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
630 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
631 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
632
633 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
634}
635
636/**
637 * Passthrough a part of a request directly to the I/O manager handling the
638 * endpoint.
639 *
640 * @returns VBox status code.
641 * @param pBlkCache The endpoint cache.
642 * @param pReq The request.
643 * @param pSgBuf The scatter/gather buffer.
644 * @param offStart Offset to start transfer from.
645 * @param cbData Amount of data to transfer.
646 * @param enmXferDir The transfer type (read/write)
647 */
648static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
649 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
650 PDMBLKCACHEXFERDIR enmXferDir)
651{
652
653 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
654 if (RT_UNLIKELY(!pIoXfer))
655 return VERR_NO_MEMORY;
656
657 ASMAtomicIncU32(&pReq->cXfersPending);
658 pIoXfer->fIoCache = false;
659 pIoXfer->pReq = pReq;
660 pIoXfer->enmXferDir = enmXferDir;
661 if (pSgBuf)
662 {
663 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
664 RTSgBufAdvance(pSgBuf, cbData);
665 }
666
667 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
668}
669
670/**
671 * Commit a single dirty entry to the endpoint
672 *
673 * @returns nothing
674 * @param pEntry The entry to commit.
675 */
676static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
677{
678 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
679 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
680 ("Invalid flags set for entry %#p\n", pEntry));
681
682 pdmBlkCacheEntryWriteToMedium(pEntry);
683}
684
685/**
686 * Commit all dirty entries for a single endpoint.
687 *
688 * @returns nothing.
689 * @param pBlkCache The endpoint cache to commit.
690 */
691static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
692{
693 uint32_t cbCommitted = 0;
694
695 /* Return if the cache was suspended. */
696 if (pBlkCache->fSuspended)
697 return;
698
699 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
700
701 /* The list is moved to a new header to reduce locking overhead. */
702 RTLISTANCHOR ListDirtyNotCommitted;
703
704 RTSpinlockAcquire(pBlkCache->LockList);
705 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
706 RTSpinlockRelease(pBlkCache->LockList);
707
708 if (!RTListIsEmpty(&ListDirtyNotCommitted))
709 {
710 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
711
712 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
713 {
714 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
715 NodeNotCommitted);
716 pdmBlkCacheEntryCommit(pEntry);
717 cbCommitted += pEntry->cbData;
718 RTListNodeRemove(&pEntry->NodeNotCommitted);
719 pEntry = pNext;
720 }
721
722 /* Commit the last endpoint */
723 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
724 pdmBlkCacheEntryCommit(pEntry);
725 cbCommitted += pEntry->cbData;
726 RTListNodeRemove(&pEntry->NodeNotCommitted);
727 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
728 ("Committed all entries but list is not empty\n"));
729 }
730
731 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
732 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
733 ("Number of committed bytes exceeds number of dirty bytes\n"));
734 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
735
736 /* Reset the commit timer if we don't have any dirty bits. */
737 if ( !(cbDirtyOld - cbCommitted)
738 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
739 TMTimerStop(pBlkCache->pCache->pTimerCommit);
740}
741
742/**
743 * Commit all dirty entries in the cache.
744 *
745 * @returns nothing.
746 * @param pCache The global cache instance.
747 */
748static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
749{
750 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
751
752 if (!fCommitInProgress)
753 {
754 pdmBlkCacheLockEnter(pCache);
755 Assert(!RTListIsEmpty(&pCache->ListUsers));
756
757 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
758 AssertPtr(pBlkCache);
759
760 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
761 {
762 pdmBlkCacheCommit(pBlkCache);
763
764 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
765 NodeCacheUser);
766 }
767
768 /* Commit the last endpoint */
769 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
770 pdmBlkCacheCommit(pBlkCache);
771
772 pdmBlkCacheLockLeave(pCache);
773 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
774 }
775}
776
777/**
778 * Adds the given entry as a dirty to the cache.
779 *
780 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
781 * @param pBlkCache The endpoint cache the entry belongs to.
782 * @param pEntry The entry to add.
783 */
784static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
785{
786 bool fDirtyBytesExceeded = false;
787 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
788
789 /* If the commit timer is disabled we commit right away. */
790 if (pCache->u32CommitTimeoutMs == 0)
791 {
792 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
793 pdmBlkCacheEntryCommit(pEntry);
794 }
795 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
796 {
797 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
798
799 RTSpinlockAcquire(pBlkCache->LockList);
800 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
801 RTSpinlockRelease(pBlkCache->LockList);
802
803 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
804
805 /* Prevent committing if the VM was suspended. */
806 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
807 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
808 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
809 {
810 /* Arm the commit timer. */
811 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
812 }
813 }
814
815 return fDirtyBytesExceeded;
816}
817
818static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
819{
820 bool fFound = false;
821
822 PPDMBLKCACHE pBlkCache;
823 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
824 {
825 if (!RTStrCmp(pBlkCache->pszId, pcszId))
826 {
827 fFound = true;
828 break;
829 }
830 }
831
832 return fFound ? pBlkCache : NULL;
833}
834
835/**
836 * Commit timer callback.
837 */
838static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
839{
840 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
841 NOREF(pVM); NOREF(pTimer);
842
843 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
844
845 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
846 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
847 pdmBlkCacheCommitDirtyEntries(pCache);
848
849 LogFlowFunc(("Entries committed, going to sleep\n"));
850}
851
852static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
853{
854 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
855
856 AssertPtr(pBlkCacheGlobal);
857
858 pdmBlkCacheLockEnter(pBlkCacheGlobal);
859
860 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
861
862 /* Go through the list and save all dirty entries. */
863 PPDMBLKCACHE pBlkCache;
864 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
865 {
866 uint32_t cEntries = 0;
867 PPDMBLKCACHEENTRY pEntry;
868
869 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
870 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
871 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
872
873 /* Count the number of entries to safe. */
874 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
875 {
876 cEntries++;
877 }
878
879 SSMR3PutU32(pSSM, cEntries);
880
881 /* Walk the list of all dirty entries and save them. */
882 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
883 {
884 /* A few sanity checks. */
885 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
886 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
887 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
888 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
889 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
890 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
891 ("Invalid list\n"));
892 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
893 ("Size and range do not match\n"));
894
895 /* Save */
896 SSMR3PutU64(pSSM, pEntry->Core.Key);
897 SSMR3PutU32(pSSM, pEntry->cbData);
898 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
899 }
900
901 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
902 }
903
904 pdmBlkCacheLockLeave(pBlkCacheGlobal);
905
906 /* Terminator */
907 return SSMR3PutU32(pSSM, UINT32_MAX);
908}
909
910static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
911{
912 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
913 uint32_t cRefs;
914
915 NOREF(uPass);
916 AssertPtr(pBlkCacheGlobal);
917
918 pdmBlkCacheLockEnter(pBlkCacheGlobal);
919
920 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
921 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
922
923 SSMR3GetU32(pSSM, &cRefs);
924
925 /*
926 * Fewer users in the saved state than in the current VM are allowed
927 * because that means that there are only new ones which don't have any saved state
928 * which can get lost.
929 * More saved state entries than registered cache users are only allowed if the
930 * missing users don't have any data saved in the cache.
931 */
932 int rc = VINF_SUCCESS;
933 char *pszId = NULL;
934
935 while ( cRefs > 0
936 && RT_SUCCESS(rc))
937 {
938 PPDMBLKCACHE pBlkCache = NULL;
939 uint32_t cbId = 0;
940
941 SSMR3GetU32(pSSM, &cbId);
942 Assert(cbId > 0);
943
944 cbId++; /* Include terminator */
945 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
946 if (!pszId)
947 {
948 rc = VERR_NO_MEMORY;
949 break;
950 }
951
952 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
953 AssertRC(rc);
954
955 /* Search for the block cache with the provided id. */
956 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
957
958 /* Get the entries */
959 uint32_t cEntries;
960 SSMR3GetU32(pSSM, &cEntries);
961
962 if (!pBlkCache && (cEntries > 0))
963 {
964 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
965 N_("The VM is missing a block device and there is data in the cache. Please make sure the source and target VMs have compatible storage configurations"));
966 break;
967 }
968
969 RTMemFree(pszId);
970 pszId = NULL;
971
972 while (cEntries > 0)
973 {
974 PPDMBLKCACHEENTRY pEntry;
975 uint64_t off;
976 uint32_t cbEntry;
977
978 SSMR3GetU64(pSSM, &off);
979 SSMR3GetU32(pSSM, &cbEntry);
980
981 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
982 if (!pEntry)
983 {
984 rc = VERR_NO_MEMORY;
985 break;
986 }
987
988 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
989 if (RT_FAILURE(rc))
990 {
991 RTMemFree(pEntry->pbData);
992 RTMemFree(pEntry);
993 break;
994 }
995
996 /* Insert into the tree. */
997 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
998 Assert(fInserted); NOREF(fInserted);
999
1000 /* Add to the dirty list. */
1001 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
1002 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
1003 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
1004 pdmBlkCacheEntryRelease(pEntry);
1005 cEntries--;
1006 }
1007
1008 cRefs--;
1009 }
1010
1011 if (pszId)
1012 RTMemFree(pszId);
1013
1014 if (cRefs && RT_SUCCESS(rc))
1015 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
1016 N_("Unexpected error while restoring state. Please make sure the source and target VMs have compatible storage configurations"));
1017
1018 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1019
1020 if (RT_SUCCESS(rc))
1021 {
1022 uint32_t u32 = 0;
1023 rc = SSMR3GetU32(pSSM, &u32);
1024 if (RT_SUCCESS(rc))
1025 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
1026 }
1027
1028 return rc;
1029}
1030
1031int pdmR3BlkCacheInit(PVM pVM)
1032{
1033 int rc = VINF_SUCCESS;
1034 PUVM pUVM = pVM->pUVM;
1035 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
1036
1037 LogFlowFunc((": pVM=%p\n", pVM));
1038
1039 VM_ASSERT_EMT(pVM);
1040
1041 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1042 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1043
1044 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1045 if (!pBlkCacheGlobal)
1046 return VERR_NO_MEMORY;
1047
1048 RTListInit(&pBlkCacheGlobal->ListUsers);
1049 pBlkCacheGlobal->pVM = pVM;
1050 pBlkCacheGlobal->cRefs = 0;
1051 pBlkCacheGlobal->cbCached = 0;
1052 pBlkCacheGlobal->fCommitInProgress = false;
1053
1054 /* Initialize members */
1055 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1056 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1057 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1058
1059 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1060 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1061 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1062
1063 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1064 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1065 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1066
1067 do
1068 {
1069 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1070 AssertLogRelRCBreak(rc);
1071 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1072
1073 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1074 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1075 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1076 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1077
1078 /** @todo r=aeichner: Experiment to find optimal default values */
1079 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1080 AssertLogRelRCBreak(rc);
1081 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1082 AssertLogRelRCBreak(rc);
1083 } while (0);
1084
1085 if (RT_SUCCESS(rc))
1086 {
1087 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1088 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1089 "/PDM/BlkCache/cbMax",
1090 STAMUNIT_BYTES,
1091 "Maximum cache size");
1092 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1093 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1094 "/PDM/BlkCache/cbCached",
1095 STAMUNIT_BYTES,
1096 "Currently used cache");
1097 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1098 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1099 "/PDM/BlkCache/cbCachedMruIn",
1100 STAMUNIT_BYTES,
1101 "Number of bytes cached in MRU list");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1103 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/cbCachedMruOut",
1105 STAMUNIT_BYTES,
1106 "Number of bytes cached in FRU list");
1107 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1108 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1109 "/PDM/BlkCache/cbCachedFru",
1110 STAMUNIT_BYTES,
1111 "Number of bytes cached in FRU ghost list");
1112
1113#ifdef VBOX_WITH_STATISTICS
1114 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1115 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1116 "/PDM/BlkCache/CacheHits",
1117 STAMUNIT_COUNT, "Number of hits in the cache");
1118 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1119 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1120 "/PDM/BlkCache/CachePartialHits",
1121 STAMUNIT_COUNT, "Number of partial hits in the cache");
1122 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1123 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1124 "/PDM/BlkCache/CacheMisses",
1125 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1126 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1127 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1128 "/PDM/BlkCache/CacheRead",
1129 STAMUNIT_BYTES, "Number of bytes read from the cache");
1130 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1131 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1132 "/PDM/BlkCache/CacheWritten",
1133 STAMUNIT_BYTES, "Number of bytes written to the cache");
1134 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1135 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1136 "/PDM/BlkCache/CacheTreeGet",
1137 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1138 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1139 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1140 "/PDM/BlkCache/CacheTreeInsert",
1141 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1142 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1143 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1144 "/PDM/BlkCache/CacheTreeRemove",
1145 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1146 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1147 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1148 "/PDM/BlkCache/CacheBuffersReused",
1149 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1150#endif
1151
1152 /* Initialize the critical section */
1153 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1154 }
1155
1156 if (RT_SUCCESS(rc))
1157 {
1158 /* Create the commit timer */
1159 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1160 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1161 pdmBlkCacheCommitTimerCallback,
1162 pBlkCacheGlobal,
1163 "BlkCache-Commit",
1164 &pBlkCacheGlobal->pTimerCommit);
1165
1166 if (RT_SUCCESS(rc))
1167 {
1168 /* Register saved state handler. */
1169 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1170 NULL, NULL, NULL,
1171 NULL, pdmR3BlkCacheSaveExec, NULL,
1172 NULL, pdmR3BlkCacheLoadExec, NULL);
1173 if (RT_SUCCESS(rc))
1174 {
1175 LogRel(("BlkCache: Cache successfully initialized. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1176 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1177 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1178 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1179 return VINF_SUCCESS;
1180 }
1181 }
1182
1183 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1184 }
1185
1186 if (pBlkCacheGlobal)
1187 RTMemFree(pBlkCacheGlobal);
1188
1189 LogFlowFunc((": returns rc=%Rrc\n", rc));
1190 return rc;
1191}
1192
1193void pdmR3BlkCacheTerm(PVM pVM)
1194{
1195 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1196
1197 if (pBlkCacheGlobal)
1198 {
1199 /* Make sure no one else uses the cache now */
1200 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1201
1202 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1203 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1204 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1205 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1206
1207 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1208
1209 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1210 RTMemFree(pBlkCacheGlobal);
1211 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1212 }
1213}
1214
1215int pdmR3BlkCacheResume(PVM pVM)
1216{
1217 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1218
1219 LogFlowFunc(("pVM=%#p\n", pVM));
1220
1221 if ( pBlkCacheGlobal
1222 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1223 {
1224 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1225 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1226 }
1227
1228 return VINF_SUCCESS;
1229}
1230
1231static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1232{
1233 int rc = VINF_SUCCESS;
1234 PPDMBLKCACHE pBlkCache = NULL;
1235 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1236
1237 if (!pBlkCacheGlobal)
1238 return VERR_NOT_SUPPORTED;
1239
1240 /*
1241 * Check that no other user cache has the same id first,
1242 * Unique id's are necessary in case the state is saved.
1243 */
1244 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1245
1246 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1247
1248 if (!pBlkCache)
1249 {
1250 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1251
1252 if (pBlkCache)
1253 pBlkCache->pszId = RTStrDup(pcszId);
1254
1255 if ( pBlkCache
1256 && pBlkCache->pszId)
1257 {
1258 pBlkCache->fSuspended = false;
1259 pBlkCache->cIoXfersActive = 0;
1260 pBlkCache->pCache = pBlkCacheGlobal;
1261 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1262
1263 rc = RTSpinlockCreate(&pBlkCache->LockList, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "pdmR3BlkCacheRetain");
1264 if (RT_SUCCESS(rc))
1265 {
1266 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1267 if (RT_SUCCESS(rc))
1268 {
1269 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1270 if (pBlkCache->pTree)
1271 {
1272#ifdef VBOX_WITH_STATISTICS
1273 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1274 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1275 STAMUNIT_COUNT, "Number of deferred writes",
1276 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1277#endif
1278
1279 /* Add to the list of users. */
1280 pBlkCacheGlobal->cRefs++;
1281 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1282 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1283
1284 *ppBlkCache = pBlkCache;
1285 LogFlowFunc(("returns success\n"));
1286 return VINF_SUCCESS;
1287 }
1288
1289 rc = VERR_NO_MEMORY;
1290 RTSemRWDestroy(pBlkCache->SemRWEntries);
1291 }
1292
1293 RTSpinlockDestroy(pBlkCache->LockList);
1294 }
1295
1296 RTStrFree(pBlkCache->pszId);
1297 }
1298 else
1299 rc = VERR_NO_MEMORY;
1300
1301 if (pBlkCache)
1302 RTMemFree(pBlkCache);
1303 }
1304 else
1305 rc = VERR_ALREADY_EXISTS;
1306
1307 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1308
1309 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1310 return rc;
1311}
1312
1313VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1314 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1315 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1316 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1317 const char *pcszId)
1318{
1319 int rc = VINF_SUCCESS;
1320 PPDMBLKCACHE pBlkCache;
1321
1322 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1323 if (RT_SUCCESS(rc))
1324 {
1325 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1326 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1327 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1328 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1329 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1330 *ppBlkCache = pBlkCache;
1331 }
1332
1333 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1334 return rc;
1335}
1336
1337VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1338 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1339 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1340 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1341 const char *pcszId)
1342{
1343 int rc = VINF_SUCCESS;
1344 PPDMBLKCACHE pBlkCache;
1345
1346 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1347 if (RT_SUCCESS(rc))
1348 {
1349 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1350 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1351 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1352 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1353 pBlkCache->u.Dev.pDevIns = pDevIns;
1354 *ppBlkCache = pBlkCache;
1355 }
1356
1357 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1358 return rc;
1359
1360}
1361
1362VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1363 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1364 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1365 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1366 const char *pcszId)
1367{
1368 int rc = VINF_SUCCESS;
1369 PPDMBLKCACHE pBlkCache;
1370
1371 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1372 if (RT_SUCCESS(rc))
1373 {
1374 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1375 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1376 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1377 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1378 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1379 *ppBlkCache = pBlkCache;
1380 }
1381
1382 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1383 return rc;
1384
1385}
1386
1387VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1388 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1389 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1390 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1391 const char *pcszId)
1392{
1393 int rc = VINF_SUCCESS;
1394 PPDMBLKCACHE pBlkCache;
1395
1396 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1397 if (RT_SUCCESS(rc))
1398 {
1399 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1400 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1401 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1402 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1403 pBlkCache->u.Int.pvUser = pvUser;
1404 *ppBlkCache = pBlkCache;
1405 }
1406
1407 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1408 return rc;
1409
1410}
1411
1412/**
1413 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1414 *
1415 * @returns IPRT status code.
1416 * @param pNode The node to destroy.
1417 * @param pvUser Opaque user data.
1418 */
1419static DECLCALLBACK(int) pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1420{
1421 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1422 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1423 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1424
1425 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1426 {
1427 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1428 pdmBlkCacheEntryRef(pEntry);
1429 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1430 pdmBlkCacheLockLeave(pCache);
1431
1432 RTThreadSleep(250);
1433
1434 /* Re-enter all locks */
1435 pdmBlkCacheLockEnter(pCache);
1436 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1437 pdmBlkCacheEntryRelease(pEntry);
1438 }
1439
1440 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1441 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1442
1443 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1444 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1445
1446 pdmBlkCacheEntryRemoveFromList(pEntry);
1447
1448 if (fUpdateCache)
1449 pdmBlkCacheSub(pCache, pEntry->cbData);
1450
1451 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1452 RTMemFree(pEntry);
1453
1454 return VINF_SUCCESS;
1455}
1456
1457/**
1458 * Destroys all cache resources used by the given endpoint.
1459 *
1460 * @returns nothing.
1461 * @param pBlkCache Block cache handle.
1462 */
1463VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1464{
1465 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1466
1467 /*
1468 * Commit all dirty entries now (they are waited on for completion during the
1469 * destruction of the AVL tree below).
1470 * The exception is if the VM was paused because of an I/O error before.
1471 */
1472 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1473 pdmBlkCacheCommit(pBlkCache);
1474
1475 /* Make sure nobody is accessing the cache while we delete the tree. */
1476 pdmBlkCacheLockEnter(pCache);
1477 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1478 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1479 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1480
1481 RTSpinlockDestroy(pBlkCache->LockList);
1482
1483 pCache->cRefs--;
1484 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1485
1486 pdmBlkCacheLockLeave(pCache);
1487
1488 RTMemFree(pBlkCache->pTree);
1489 pBlkCache->pTree = NULL;
1490 RTSemRWDestroy(pBlkCache->SemRWEntries);
1491
1492#ifdef VBOX_WITH_STATISTICS
1493 STAMR3DeregisterF(pCache->pVM->pUVM, "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1494#endif
1495
1496 RTStrFree(pBlkCache->pszId);
1497 RTMemFree(pBlkCache);
1498}
1499
1500VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1501{
1502 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1503
1504 /*
1505 * Validate input.
1506 */
1507 if (!pDevIns)
1508 return;
1509 VM_ASSERT_EMT(pVM);
1510
1511 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1512 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1513
1514 /* Return silently if not supported. */
1515 if (!pBlkCacheGlobal)
1516 return;
1517
1518 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1519
1520 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1521 {
1522 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1523 && pBlkCache->u.Dev.pDevIns == pDevIns)
1524 PDMR3BlkCacheRelease(pBlkCache);
1525 }
1526
1527 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1528}
1529
1530VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1531{
1532 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1533
1534 /*
1535 * Validate input.
1536 */
1537 if (!pDrvIns)
1538 return;
1539 VM_ASSERT_EMT(pVM);
1540
1541 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1542 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1543
1544 /* Return silently if not supported. */
1545 if (!pBlkCacheGlobal)
1546 return;
1547
1548 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1549
1550 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1551 {
1552 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1553 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1554 PDMR3BlkCacheRelease(pBlkCache);
1555 }
1556
1557 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1558}
1559
1560VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1561{
1562 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1563
1564 /*
1565 * Validate input.
1566 */
1567 if (!pUsbIns)
1568 return;
1569 VM_ASSERT_EMT(pVM);
1570
1571 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1572 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1573
1574 /* Return silently if not supported. */
1575 if (!pBlkCacheGlobal)
1576 return;
1577
1578 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1579
1580 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1581 {
1582 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1583 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1584 PDMR3BlkCacheRelease(pBlkCache);
1585 }
1586
1587 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1588}
1589
1590static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1591{
1592 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1593
1594 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1595 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1596 if (pEntry)
1597 pdmBlkCacheEntryRef(pEntry);
1598 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1599
1600 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1601
1602 return pEntry;
1603}
1604
1605/**
1606 * Return the best fit cache entries for the given offset.
1607 *
1608 * @returns nothing.
1609 * @param pBlkCache The endpoint cache.
1610 * @param off The offset.
1611 * @param ppEntryAbove Where to store the pointer to the best fit entry above
1612 * the given offset. NULL if not required.
1613 */
1614static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEENTRY *ppEntryAbove)
1615{
1616 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1617
1618 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1619 if (ppEntryAbove)
1620 {
1621 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1622 if (*ppEntryAbove)
1623 pdmBlkCacheEntryRef(*ppEntryAbove);
1624 }
1625
1626 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1627
1628 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1629}
1630
1631static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1632{
1633 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1634 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1635 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1636 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1637 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1638 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1639}
1640
1641/**
1642 * Allocates and initializes a new entry for the cache.
1643 * The entry has a reference count of 1.
1644 *
1645 * @returns Pointer to the new cache entry or NULL if out of memory.
1646 * @param pBlkCache The cache the entry belongs to.
1647 * @param off Start offset.
1648 * @param cbData Size of the cache entry.
1649 * @param pbBuffer Pointer to the buffer to use.
1650 * NULL if a new buffer should be allocated.
1651 * The buffer needs to have the same size of the entry.
1652 */
1653static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbData, uint8_t *pbBuffer)
1654{
1655 AssertReturn(cbData <= UINT32_MAX, NULL);
1656 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1657
1658 if (RT_UNLIKELY(!pEntryNew))
1659 return NULL;
1660
1661 pEntryNew->Core.Key = off;
1662 pEntryNew->Core.KeyLast = off + cbData - 1;
1663 pEntryNew->pBlkCache = pBlkCache;
1664 pEntryNew->fFlags = 0;
1665 pEntryNew->cRefs = 1; /* We are using it now. */
1666 pEntryNew->pList = NULL;
1667 pEntryNew->cbData = (uint32_t)cbData;
1668 pEntryNew->pWaitingHead = NULL;
1669 pEntryNew->pWaitingTail = NULL;
1670 if (pbBuffer)
1671 pEntryNew->pbData = pbBuffer;
1672 else
1673 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1674
1675 if (RT_UNLIKELY(!pEntryNew->pbData))
1676 {
1677 RTMemFree(pEntryNew);
1678 return NULL;
1679 }
1680
1681 return pEntryNew;
1682}
1683
1684/**
1685 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1686 * in exclusive mode.
1687 *
1688 * @returns true if the flag in fSet is set and the one in fClear is clear.
1689 * false otherwise.
1690 * The R/W semaphore is only held if true is returned.
1691 *
1692 * @param pBlkCache The endpoint cache instance data.
1693 * @param pEntry The entry to check the flags for.
1694 * @param fSet The flag which is tested to be set.
1695 * @param fClear The flag which is tested to be clear.
1696 */
1697DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1698 PPDMBLKCACHEENTRY pEntry,
1699 uint32_t fSet, uint32_t fClear)
1700{
1701 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1702 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1703
1704 if (fPassed)
1705 {
1706 /* Acquire the lock and check again because the completion callback might have raced us. */
1707 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1708
1709 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1710 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1711
1712 /* Drop the lock if we didn't passed the test. */
1713 if (!fPassed)
1714 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1715 }
1716
1717 return fPassed;
1718}
1719
1720/**
1721 * Adds a segment to the waiting list for a cache entry
1722 * which is currently in progress.
1723 *
1724 * @returns nothing.
1725 * @param pEntry The cache entry to add the segment to.
1726 * @param pWaiter The waiter entry to add.
1727 */
1728DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1729 PPDMBLKCACHEWAITER pWaiter)
1730{
1731 pWaiter->pNext = NULL;
1732
1733 if (pEntry->pWaitingHead)
1734 {
1735 AssertPtr(pEntry->pWaitingTail);
1736
1737 pEntry->pWaitingTail->pNext = pWaiter;
1738 pEntry->pWaitingTail = pWaiter;
1739 }
1740 else
1741 {
1742 Assert(!pEntry->pWaitingTail);
1743
1744 pEntry->pWaitingHead = pWaiter;
1745 pEntry->pWaitingTail = pWaiter;
1746 }
1747}
1748
1749/**
1750 * Add a buffer described by the I/O memory context
1751 * to the entry waiting for completion.
1752 *
1753 * @returns VBox status code.
1754 * @param pEntry The entry to add the buffer to.
1755 * @param pReq The request.
1756 * @param pSgBuf The scatter/gather buffer. Will be advanced by cbData.
1757 * @param offDiff Offset from the start of the buffer in the entry.
1758 * @param cbData Amount of data to wait for onthis entry.
1759 * @param fWrite Flag whether the task waits because it wants to write to
1760 * the cache entry.
1761 */
1762static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry, PPDMBLKCACHEREQ pReq,
1763 PRTSGBUF pSgBuf, uint64_t offDiff, size_t cbData, bool fWrite)
1764{
1765 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1766 if (!pWaiter)
1767 return VERR_NO_MEMORY;
1768
1769 ASMAtomicIncU32(&pReq->cXfersPending);
1770 pWaiter->pReq = pReq;
1771 pWaiter->offCacheEntry = offDiff;
1772 pWaiter->cbTransfer = cbData;
1773 pWaiter->fWrite = fWrite;
1774 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1775 RTSgBufAdvance(pSgBuf, cbData);
1776
1777 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1778
1779 return VINF_SUCCESS;
1780}
1781
1782/**
1783 * Calculate aligned offset and size for a new cache entry which do not
1784 * intersect with an already existing entry and the file end.
1785 *
1786 * @returns The number of bytes the entry can hold of the requested amount
1787 * of bytes.
1788 * @param pBlkCache The endpoint cache.
1789 * @param off The start offset.
1790 * @param cb The number of bytes the entry needs to hold at
1791 * least.
1792 * @param pcbEntry Where to store the number of bytes the entry can hold.
1793 * Can be less than given because of other entries.
1794 */
1795static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1796 uint64_t off, uint32_t cb,
1797 uint32_t *pcbEntry)
1798{
1799 /* Get the best fit entries around the offset */
1800 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1801 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1802
1803 /* Log the info */
1804 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1805 pEntryAbove ? "B" : "No b",
1806 off,
1807 pEntryAbove ? pEntryAbove->Core.Key : 0,
1808 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1809 pEntryAbove ? pEntryAbove->cbData : 0));
1810
1811 uint32_t cbNext;
1812 uint32_t cbInEntry;
1813 if ( pEntryAbove
1814 && off + cb > pEntryAbove->Core.Key)
1815 {
1816 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1817 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1818 }
1819 else
1820 {
1821 cbInEntry = cb;
1822 cbNext = cb;
1823 }
1824
1825 /* A few sanity checks */
1826 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1827 ("Aligned size intersects with another cache entry\n"));
1828 Assert(cbInEntry <= cbNext);
1829
1830 if (pEntryAbove)
1831 pdmBlkCacheEntryRelease(pEntryAbove);
1832
1833 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1834
1835 *pcbEntry = cbNext;
1836
1837 return cbInEntry;
1838}
1839
1840/**
1841 * Create a new cache entry evicting data from the cache if required.
1842 *
1843 * @returns Pointer to the new cache entry or NULL
1844 * if not enough bytes could be evicted from the cache.
1845 * @param pBlkCache The endpoint cache.
1846 * @param off The offset.
1847 * @param cb Number of bytes the cache entry should have.
1848 * @param pcbData Where to store the number of bytes the new
1849 * entry can hold. May be lower than actually
1850 * requested due to another entry intersecting the
1851 * access range.
1852 */
1853static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cb, size_t *pcbData)
1854{
1855 uint32_t cbEntry = 0;
1856
1857 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1858 AssertReturn(cb <= UINT32_MAX, NULL);
1859
1860 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1861 pdmBlkCacheLockEnter(pCache);
1862
1863 PPDMBLKCACHEENTRY pEntryNew = NULL;
1864 uint8_t *pbBuffer = NULL;
1865 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1866 if (fEnough)
1867 {
1868 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1869
1870 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1871 if (RT_LIKELY(pEntryNew))
1872 {
1873 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1874 pdmBlkCacheAdd(pCache, cbEntry);
1875 pdmBlkCacheLockLeave(pCache);
1876
1877 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1878
1879 AssertMsg( (off >= pEntryNew->Core.Key)
1880 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1881 ("Overflow in calculation off=%llu\n", off));
1882 }
1883 else
1884 pdmBlkCacheLockLeave(pCache);
1885 }
1886 else
1887 pdmBlkCacheLockLeave(pCache);
1888
1889 return pEntryNew;
1890}
1891
1892static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1893{
1894 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1895
1896 if (RT_LIKELY(pReq))
1897 {
1898 pReq->pvUser = pvUser;
1899 pReq->rcReq = VINF_SUCCESS;
1900 pReq->cXfersPending = 0;
1901 }
1902
1903 return pReq;
1904}
1905
1906static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1907{
1908 switch (pBlkCache->enmType)
1909 {
1910 case PDMBLKCACHETYPE_DEV:
1911 {
1912 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1913 pReq->pvUser, pReq->rcReq);
1914 break;
1915 }
1916 case PDMBLKCACHETYPE_DRV:
1917 {
1918 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1919 pReq->pvUser, pReq->rcReq);
1920 break;
1921 }
1922 case PDMBLKCACHETYPE_USB:
1923 {
1924 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1925 pReq->pvUser, pReq->rcReq);
1926 break;
1927 }
1928 case PDMBLKCACHETYPE_INTERNAL:
1929 {
1930 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1931 pReq->pvUser, pReq->rcReq);
1932 break;
1933 }
1934 default:
1935 AssertMsgFailed(("Unknown block cache type!\n"));
1936 }
1937
1938 RTMemFree(pReq);
1939}
1940
1941static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1942 int rcReq, bool fCallHandler)
1943{
1944 if (RT_FAILURE(rcReq))
1945 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1946
1947 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1948 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1949
1950 if (!cXfersPending)
1951 {
1952 if (fCallHandler)
1953 pdmBlkCacheReqComplete(pBlkCache, pReq);
1954 return true;
1955 }
1956
1957 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1958 return false;
1959}
1960
1961VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1962 PCRTSGBUF pSgBuf, size_t cbRead, void *pvUser)
1963{
1964 int rc = VINF_SUCCESS;
1965 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1966 PPDMBLKCACHEENTRY pEntry;
1967 PPDMBLKCACHEREQ pReq;
1968
1969 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbRead=%u pvUser=%#p\n",
1970 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbRead, pvUser));
1971
1972 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1973 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1974
1975 RTSGBUF SgBuf;
1976 RTSgBufClone(&SgBuf, pSgBuf);
1977
1978 /* Allocate new request structure. */
1979 pReq = pdmBlkCacheReqAlloc(pvUser);
1980 if (RT_UNLIKELY(!pReq))
1981 return VERR_NO_MEMORY;
1982
1983 /* Increment data transfer counter to keep the request valid while we access it. */
1984 ASMAtomicIncU32(&pReq->cXfersPending);
1985
1986 while (cbRead)
1987 {
1988 size_t cbToRead;
1989
1990 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1991
1992 /*
1993 * If there is no entry we try to create a new one eviciting unused pages
1994 * if the cache is full. If this is not possible we will pass the request through
1995 * and skip the caching (all entries may be still in progress so they can't
1996 * be evicted)
1997 * If we have an entry it can be in one of the LRU lists where the entry
1998 * contains data (recently used or frequently used LRU) so we can just read
1999 * the data we need and put the entry at the head of the frequently used LRU list.
2000 * In case the entry is in one of the ghost lists it doesn't contain any data.
2001 * We have to fetch it again evicting pages from either T1 or T2 to make room.
2002 */
2003 if (pEntry)
2004 {
2005 uint64_t offDiff = off - pEntry->Core.Key;
2006
2007 AssertMsg(off >= pEntry->Core.Key,
2008 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2009 off, pEntry->Core.Key));
2010
2011 AssertPtr(pEntry->pList);
2012
2013 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
2014
2015 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
2016 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
2017 off, cbToRead));
2018
2019 cbRead -= cbToRead;
2020
2021 if (!cbRead)
2022 STAM_COUNTER_INC(&pCache->cHits);
2023 else
2024 STAM_COUNTER_INC(&pCache->cPartialHits);
2025
2026 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
2027
2028 /* Ghost lists contain no data. */
2029 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2030 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2031 {
2032 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2033 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2034 PDMBLKCACHE_ENTRY_IS_DIRTY))
2035 {
2036 /* Entry didn't completed yet. Append to the list */
2037 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2038 &SgBuf, offDiff, cbToRead,
2039 false /* fWrite */);
2040 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2041 }
2042 else
2043 {
2044 /* Read as much as we can from the entry. */
2045 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2046 }
2047
2048 /* Move this entry to the top position */
2049 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2050 {
2051 pdmBlkCacheLockEnter(pCache);
2052 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2053 pdmBlkCacheLockLeave(pCache);
2054 }
2055 /* Release the entry */
2056 pdmBlkCacheEntryRelease(pEntry);
2057 }
2058 else
2059 {
2060 uint8_t *pbBuffer = NULL;
2061
2062 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2063
2064 pdmBlkCacheLockEnter(pCache);
2065 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2066 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2067
2068 /* Move the entry to Am and fetch it to the cache. */
2069 if (fEnough)
2070 {
2071 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2072 pdmBlkCacheAdd(pCache, pEntry->cbData);
2073 pdmBlkCacheLockLeave(pCache);
2074
2075 if (pbBuffer)
2076 pEntry->pbData = pbBuffer;
2077 else
2078 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2079 AssertPtr(pEntry->pbData);
2080
2081 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2082 &SgBuf, offDiff, cbToRead,
2083 false /* fWrite */);
2084 pdmBlkCacheEntryReadFromMedium(pEntry);
2085 /* Release the entry */
2086 pdmBlkCacheEntryRelease(pEntry);
2087 }
2088 else
2089 {
2090 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2091 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2092 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2093 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2094 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2095
2096 pdmBlkCacheLockLeave(pCache);
2097
2098 RTMemFree(pEntry);
2099
2100 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2101 &SgBuf, off, cbToRead,
2102 PDMBLKCACHEXFERDIR_READ);
2103 }
2104 }
2105 }
2106 else
2107 {
2108#ifdef VBOX_WITH_IO_READ_CACHE
2109 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2110 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2111 off, cbRead,
2112 &cbToRead);
2113
2114 cbRead -= cbToRead;
2115
2116 if (pEntryNew)
2117 {
2118 if (!cbRead)
2119 STAM_COUNTER_INC(&pCache->cMisses);
2120 else
2121 STAM_COUNTER_INC(&pCache->cPartialHits);
2122
2123 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2124 &SgBuf,
2125 off - pEntryNew->Core.Key,
2126 cbToRead,
2127 false /* fWrite */);
2128 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2129 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2130 }
2131 else
2132 {
2133 /*
2134 * There is not enough free space in the cache.
2135 * Pass the request directly to the I/O manager.
2136 */
2137 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2138
2139 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2140 &SgBuf, off, cbToRead,
2141 PDMBLKCACHEXFERDIR_READ);
2142 }
2143#else
2144 /* Clip read size if necessary. */
2145 PPDMBLKCACHEENTRY pEntryAbove;
2146 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2147
2148 if (pEntryAbove)
2149 {
2150 if (off + cbRead > pEntryAbove->Core.Key)
2151 cbToRead = pEntryAbove->Core.Key - off;
2152 else
2153 cbToRead = cbRead;
2154
2155 pdmBlkCacheEntryRelease(pEntryAbove);
2156 }
2157 else
2158 cbToRead = cbRead;
2159
2160 cbRead -= cbToRead;
2161 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2162 &SgBuf, off, cbToRead,
2163 PDMBLKCACHEXFERDIR_READ);
2164#endif
2165 }
2166 off += cbToRead;
2167 }
2168
2169 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2170 rc = VINF_AIO_TASK_PENDING;
2171 else
2172 {
2173 rc = pReq->rcReq;
2174 RTMemFree(pReq);
2175 }
2176
2177 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2178
2179 return rc;
2180}
2181
2182VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off, PCRTSGBUF pSgBuf, size_t cbWrite, void *pvUser)
2183{
2184 int rc = VINF_SUCCESS;
2185 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2186 PPDMBLKCACHEENTRY pEntry;
2187 PPDMBLKCACHEREQ pReq;
2188
2189 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2190 pBlkCache, pBlkCache->pszId, off, pSgBuf, cbWrite, pvUser));
2191
2192 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2193 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2194
2195 RTSGBUF SgBuf;
2196 RTSgBufClone(&SgBuf, pSgBuf);
2197
2198 /* Allocate new request structure. */
2199 pReq = pdmBlkCacheReqAlloc(pvUser);
2200 if (RT_UNLIKELY(!pReq))
2201 return VERR_NO_MEMORY;
2202
2203 /* Increment data transfer counter to keep the request valid while we access it. */
2204 ASMAtomicIncU32(&pReq->cXfersPending);
2205
2206 while (cbWrite)
2207 {
2208 size_t cbToWrite;
2209
2210 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2211 if (pEntry)
2212 {
2213 /* Write the data into the entry and mark it as dirty */
2214 AssertPtr(pEntry->pList);
2215
2216 uint64_t offDiff = off - pEntry->Core.Key;
2217 AssertMsg(off >= pEntry->Core.Key, ("Overflow in calculation off=%llu OffsetAligned=%llu\n", off, pEntry->Core.Key));
2218
2219 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2220 cbWrite -= cbToWrite;
2221
2222 if (!cbWrite)
2223 STAM_COUNTER_INC(&pCache->cHits);
2224 else
2225 STAM_COUNTER_INC(&pCache->cPartialHits);
2226
2227 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2228
2229 /* Ghost lists contain no data. */
2230 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2231 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2232 {
2233 /* Check if the entry is dirty. */
2234 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2235 PDMBLKCACHE_ENTRY_IS_DIRTY,
2236 0))
2237 {
2238 /* If it is already dirty but not in progress just update the data. */
2239 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2240 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2241 else
2242 {
2243 /* The data isn't written to the file yet */
2244 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2245 &SgBuf, offDiff, cbToWrite,
2246 true /* fWrite */);
2247 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2248 }
2249
2250 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2251 }
2252 else /* Dirty bit not set */
2253 {
2254 /*
2255 * Check if a read is in progress for this entry.
2256 * We have to defer processing in that case.
2257 */
2258 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2259 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2260 0))
2261 {
2262 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2263 &SgBuf, offDiff, cbToWrite,
2264 true /* fWrite */);
2265 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2266 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2267 }
2268 else /* I/O in progress flag not set */
2269 {
2270 /* Write as much as we can into the entry and update the file. */
2271 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2272
2273 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2274 if (fCommit)
2275 pdmBlkCacheCommitDirtyEntries(pCache);
2276 }
2277 } /* Dirty bit not set */
2278
2279 /* Move this entry to the top position */
2280 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2281 {
2282 pdmBlkCacheLockEnter(pCache);
2283 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2284 pdmBlkCacheLockLeave(pCache);
2285 }
2286
2287 pdmBlkCacheEntryRelease(pEntry);
2288 }
2289 else /* Entry is on the ghost list */
2290 {
2291 uint8_t *pbBuffer = NULL;
2292
2293 pdmBlkCacheLockEnter(pCache);
2294 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2295 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2296
2297 if (fEnough)
2298 {
2299 /* Move the entry to Am and fetch it to the cache. */
2300 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2301 pdmBlkCacheAdd(pCache, pEntry->cbData);
2302 pdmBlkCacheLockLeave(pCache);
2303
2304 if (pbBuffer)
2305 pEntry->pbData = pbBuffer;
2306 else
2307 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2308 AssertPtr(pEntry->pbData);
2309
2310 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2311 &SgBuf, offDiff, cbToWrite,
2312 true /* fWrite */);
2313 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2314 pdmBlkCacheEntryReadFromMedium(pEntry);
2315
2316 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2317 pdmBlkCacheEntryRelease(pEntry);
2318 }
2319 else
2320 {
2321 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2322 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2323 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2324 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2325 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2326
2327 pdmBlkCacheLockLeave(pCache);
2328
2329 RTMemFree(pEntry);
2330 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2331 &SgBuf, off, cbToWrite,
2332 PDMBLKCACHEXFERDIR_WRITE);
2333 }
2334 }
2335 }
2336 else /* No entry found */
2337 {
2338 /*
2339 * No entry found. Try to create a new cache entry to store the data in and if that fails
2340 * write directly to the file.
2341 */
2342 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2343 off, cbWrite,
2344 &cbToWrite);
2345
2346 cbWrite -= cbToWrite;
2347
2348 if (pEntryNew)
2349 {
2350 uint64_t offDiff = off - pEntryNew->Core.Key;
2351
2352 STAM_COUNTER_INC(&pCache->cHits);
2353
2354 /*
2355 * Check if it is possible to just write the data without waiting
2356 * for it to get fetched first.
2357 */
2358 if (!offDiff && pEntryNew->cbData == cbToWrite)
2359 {
2360 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2361
2362 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2363 if (fCommit)
2364 pdmBlkCacheCommitDirtyEntries(pCache);
2365 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2366 }
2367 else
2368 {
2369 /* Defer the write and fetch the data from the endpoint. */
2370 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2371 &SgBuf, offDiff, cbToWrite,
2372 true /* fWrite */);
2373 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2374 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2375 }
2376
2377 pdmBlkCacheEntryRelease(pEntryNew);
2378 }
2379 else
2380 {
2381 /*
2382 * There is not enough free space in the cache.
2383 * Pass the request directly to the I/O manager.
2384 */
2385 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2386
2387 STAM_COUNTER_INC(&pCache->cMisses);
2388
2389 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2390 &SgBuf, off, cbToWrite,
2391 PDMBLKCACHEXFERDIR_WRITE);
2392 }
2393 }
2394
2395 off += cbToWrite;
2396 }
2397
2398 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2399 rc = VINF_AIO_TASK_PENDING;
2400 else
2401 {
2402 rc = pReq->rcReq;
2403 RTMemFree(pReq);
2404 }
2405
2406 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2407
2408 return rc;
2409}
2410
2411VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2412{
2413 int rc = VINF_SUCCESS;
2414 PPDMBLKCACHEREQ pReq;
2415
2416 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2417
2418 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2419 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2420
2421 /* Commit dirty entries in the cache. */
2422 pdmBlkCacheCommit(pBlkCache);
2423
2424 /* Allocate new request structure. */
2425 pReq = pdmBlkCacheReqAlloc(pvUser);
2426 if (RT_UNLIKELY(!pReq))
2427 return VERR_NO_MEMORY;
2428
2429 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2430 PDMBLKCACHEXFERDIR_FLUSH);
2431 AssertRC(rc);
2432
2433 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2434 return VINF_AIO_TASK_PENDING;
2435}
2436
2437VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2438 unsigned cRanges, void *pvUser)
2439{
2440 int rc = VINF_SUCCESS;
2441 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2442 PPDMBLKCACHEENTRY pEntry;
2443 PPDMBLKCACHEREQ pReq;
2444
2445 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2446 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2447
2448 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2449 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2450
2451 /* Allocate new request structure. */
2452 pReq = pdmBlkCacheReqAlloc(pvUser);
2453 if (RT_UNLIKELY(!pReq))
2454 return VERR_NO_MEMORY;
2455
2456 /* Increment data transfer counter to keep the request valid while we access it. */
2457 ASMAtomicIncU32(&pReq->cXfersPending);
2458
2459 for (unsigned i = 0; i < cRanges; i++)
2460 {
2461 uint64_t offCur = paRanges[i].offStart;
2462 size_t cbLeft = paRanges[i].cbRange;
2463
2464 while (cbLeft)
2465 {
2466 size_t cbThisDiscard = 0;
2467
2468 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2469
2470 if (pEntry)
2471 {
2472 /* Write the data into the entry and mark it as dirty */
2473 AssertPtr(pEntry->pList);
2474
2475 uint64_t offDiff = offCur - pEntry->Core.Key;
2476
2477 AssertMsg(offCur >= pEntry->Core.Key,
2478 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2479 offCur, pEntry->Core.Key));
2480
2481 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2482
2483 /* Ghost lists contain no data. */
2484 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2485 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2486 {
2487 /* Check if the entry is dirty. */
2488 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2489 PDMBLKCACHE_ENTRY_IS_DIRTY,
2490 0))
2491 {
2492 /* If it is dirty but not yet in progress remove it. */
2493 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2494 {
2495 pdmBlkCacheLockEnter(pCache);
2496 pdmBlkCacheEntryRemoveFromList(pEntry);
2497
2498 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2499 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2500 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2501
2502 pdmBlkCacheLockLeave(pCache);
2503
2504 RTMemFree(pEntry);
2505 }
2506 else
2507 {
2508#if 0
2509 /* The data isn't written to the file yet */
2510 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2511 &SgBuf, offDiff, cbToWrite,
2512 true /* fWrite */);
2513 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2514#endif
2515 }
2516
2517 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2518 pdmBlkCacheEntryRelease(pEntry);
2519 }
2520 else /* Dirty bit not set */
2521 {
2522 /*
2523 * Check if a read is in progress for this entry.
2524 * We have to defer processing in that case.
2525 */
2526 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2527 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2528 0))
2529 {
2530#if 0
2531 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2532 &SgBuf, offDiff, cbToWrite,
2533 true /* fWrite */);
2534#endif
2535 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2536 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2537 pdmBlkCacheEntryRelease(pEntry);
2538 }
2539 else /* I/O in progress flag not set */
2540 {
2541 pdmBlkCacheLockEnter(pCache);
2542 pdmBlkCacheEntryRemoveFromList(pEntry);
2543
2544 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2545 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2546 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2547 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2548 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2549
2550 pdmBlkCacheLockLeave(pCache);
2551
2552 RTMemFree(pEntry);
2553 }
2554 } /* Dirty bit not set */
2555 }
2556 else /* Entry is on the ghost list just remove cache entry. */
2557 {
2558 pdmBlkCacheLockEnter(pCache);
2559 pdmBlkCacheEntryRemoveFromList(pEntry);
2560
2561 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2562 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2563 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2564 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2565 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2566
2567 pdmBlkCacheLockLeave(pCache);
2568
2569 RTMemFree(pEntry);
2570 }
2571 }
2572 /* else: no entry found. */
2573
2574 offCur += cbThisDiscard;
2575 cbLeft -= cbThisDiscard;
2576 }
2577 }
2578
2579 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2580 rc = VINF_AIO_TASK_PENDING;
2581 else
2582 {
2583 rc = pReq->rcReq;
2584 RTMemFree(pReq);
2585 }
2586
2587 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2588
2589 return rc;
2590}
2591
2592/**
2593 * Completes a task segment freeing all resources and completes the task handle
2594 * if everything was transferred.
2595 *
2596 * @returns Next task segment handle.
2597 * @param pBlkCache The endpoint block cache.
2598 * @param pWaiter Task segment to complete.
2599 * @param rc Status code to set.
2600 */
2601static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEWAITER pWaiter, int rc)
2602{
2603 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2604 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2605
2606 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2607
2608 RTMemFree(pWaiter);
2609
2610 return pNext;
2611}
2612
2613static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2614{
2615 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2616 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2617
2618 /* Reference the entry now as we are clearing the I/O in progress flag
2619 * which protected the entry till now. */
2620 pdmBlkCacheEntryRef(pEntry);
2621
2622 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2623 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2624
2625 /* Process waiting segment list. The data in entry might have changed in-between. */
2626 bool fDirty = false;
2627 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2628 PPDMBLKCACHEWAITER pCurr = pComplete;
2629
2630 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2631 ("The list tail was not updated correctly\n"));
2632 pEntry->pWaitingTail = NULL;
2633 pEntry->pWaitingHead = NULL;
2634
2635 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2636 {
2637 /*
2638 * An error here is difficult to handle as the original request completed already.
2639 * The error is logged for now and the VM is paused.
2640 * If the user continues the entry is written again in the hope
2641 * the user fixed the problem and the next write succeeds.
2642 */
2643 if (RT_FAILURE(rcIoXfer))
2644 {
2645 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2646 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2647
2648 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2649 {
2650 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2651 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2652 "Make sure there is enough free space on the disk and that the disk is working properly. "
2653 "Operation can be resumed afterwards"),
2654 pBlkCache->pszId, rcIoXfer);
2655 AssertRC(rc);
2656 }
2657
2658 /* Mark the entry as dirty again to get it added to the list later on. */
2659 fDirty = true;
2660 }
2661
2662 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2663
2664 while (pCurr)
2665 {
2666 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2667
2668 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2669 fDirty = true;
2670 pCurr = pCurr->pNext;
2671 }
2672 }
2673 else
2674 {
2675 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2676 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2677 ("Invalid flags set\n"));
2678
2679 while (pCurr)
2680 {
2681 if (pCurr->fWrite)
2682 {
2683 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2684 fDirty = true;
2685 }
2686 else
2687 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2688
2689 pCurr = pCurr->pNext;
2690 }
2691 }
2692
2693 bool fCommit = false;
2694 if (fDirty)
2695 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2696
2697 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2698
2699 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2700 pdmBlkCacheEntryRelease(pEntry);
2701
2702 if (fCommit)
2703 pdmBlkCacheCommitDirtyEntries(pCache);
2704
2705 /* Complete waiters now. */
2706 while (pComplete)
2707 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2708}
2709
2710VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2711{
2712 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2713
2714 if (hIoXfer->fIoCache)
2715 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2716 else
2717 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2718
2719 ASMAtomicDecU32(&pBlkCache->cIoXfersActive);
2720 pdmBlkCacheR3TraceMsgF(pBlkCache, "BlkCache: I/O req %#p (%RTbool) completed (%u now active)",
2721 hIoXfer, hIoXfer->fIoCache, pBlkCache->cIoXfersActive);
2722 RTMemFree(hIoXfer);
2723}
2724
2725/**
2726 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2727 *
2728 * @returns IPRT status code.
2729 * @param pNode The node to destroy.
2730 * @param pvUser Opaque user data.
2731 */
2732static DECLCALLBACK(int) pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2733{
2734 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2735 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2736 NOREF(pvUser);
2737
2738 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2739 {
2740 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2741 pdmBlkCacheEntryRef(pEntry);
2742 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2743
2744 RTThreadSleep(1);
2745
2746 /* Re-enter all locks and drop the reference. */
2747 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2748 pdmBlkCacheEntryRelease(pEntry);
2749 }
2750
2751 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2752 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2753
2754 return VINF_SUCCESS;
2755}
2756
2757VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2758{
2759 int rc = VINF_SUCCESS;
2760 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2761
2762 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2763
2764 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2765 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2766 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2767
2768 /* Wait for all I/O to complete. */
2769 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2770 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2771 AssertRC(rc);
2772 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2773
2774 return rc;
2775}
2776
2777VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2778{
2779 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2780
2781 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2782
2783 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2784
2785 return VINF_SUCCESS;
2786}
2787
2788VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2789{
2790 int rc = VINF_SUCCESS;
2791 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2792
2793 /*
2794 * Commit all dirty entries now (they are waited on for completion during the
2795 * destruction of the AVL tree below).
2796 * The exception is if the VM was paused because of an I/O error before.
2797 */
2798 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2799 pdmBlkCacheCommit(pBlkCache);
2800
2801 /* Make sure nobody is accessing the cache while we delete the tree. */
2802 pdmBlkCacheLockEnter(pCache);
2803 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2804 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2805 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2806
2807 pdmBlkCacheLockLeave(pCache);
2808 return rc;
2809}
2810
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette