VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMBlkCache.cpp@ 39944

Last change on this file since 39944 was 39515, checked in by vboxsync, 13 years ago

*: Use RTLISTANCHOR.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.6 KB
Line 
1/* $Id: PDMBlkCache.cpp 39515 2011-12-02 13:41:07Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/vmm/stam.h>
33#include <VBox/vmm/uvm.h>
34#include <VBox/vmm/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
67
68/*******************************************************************************
69* Internal Functions *
70*******************************************************************************/
71
72static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
73 uint64_t off, size_t cbData, uint8_t *pbBuffer);
74static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
75
76/**
77 * Decrement the reference counter of the given cache entry.
78 *
79 * @returns nothing.
80 * @param pEntry The entry to release.
81 */
82DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
83{
84 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
85 ASMAtomicDecU32(&pEntry->cRefs);
86}
87
88/**
89 * Increment the reference counter of the given cache entry.
90 *
91 * @returns nothing.
92 * @param pEntry The entry to reference.
93 */
94DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
95{
96 ASMAtomicIncU32(&pEntry->cRefs);
97}
98
99#ifdef VBOX_STRICT
100static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
101{
102 /* Amount of cached data should never exceed the maximum amount. */
103 AssertMsg(pCache->cbCached <= pCache->cbMax,
104 ("Current amount of cached data exceeds maximum\n"));
105
106 /* The amount of cached data in the LRU and FRU list should match cbCached */
107 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
108 ("Amount of cached data doesn't match\n"));
109
110 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
111 ("Paged out list exceeds maximum\n"));
112}
113#endif
114
115DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
116{
117 RTCritSectEnter(&pCache->CritSect);
118#ifdef VBOX_STRICT
119 pdmBlkCacheValidate(pCache);
120#endif
121}
122
123DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
124{
125#ifdef VBOX_STRICT
126 pdmBlkCacheValidate(pCache);
127#endif
128 RTCritSectLeave(&pCache->CritSect);
129}
130
131DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached -= cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
138{
139 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
140 pCache->cbCached += cbAmount;
141}
142
143DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
144{
145 pList->cbCached += cbAmount;
146}
147
148DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
149{
150 pList->cbCached -= cbAmount;
151}
152
153#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
154/**
155 * Checks consistency of a LRU list.
156 *
157 * @returns nothing
158 * @param pList The LRU list to check.
159 * @param pNotInList Element which is not allowed to occur in the list.
160 */
161static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
162{
163 PPDMBLKCACHEENTRY pCurr = pList->pHead;
164
165 /* Check that there are no double entries and no cycles in the list. */
166 while (pCurr)
167 {
168 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
169
170 while (pNext)
171 {
172 AssertMsg(pCurr != pNext,
173 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
174 pCurr, pList));
175 pNext = pNext->pNext;
176 }
177
178 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
179
180 if (!pCurr->pNext)
181 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
182
183 pCurr = pCurr->pNext;
184 }
185}
186#endif
187
188/**
189 * Unlinks a cache entry from the LRU list it is assigned to.
190 *
191 * @returns nothing.
192 * @param pEntry The entry to unlink.
193 */
194static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
195{
196 PPDMBLKLRULIST pList = pEntry->pList;
197 PPDMBLKCACHEENTRY pPrev, pNext;
198
199 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
200
201 AssertPtr(pList);
202
203#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
204 pdmBlkCacheCheckList(pList, NULL);
205#endif
206
207 pPrev = pEntry->pPrev;
208 pNext = pEntry->pNext;
209
210 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
211 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
212
213 if (pPrev)
214 pPrev->pNext = pNext;
215 else
216 {
217 pList->pHead = pNext;
218
219 if (pNext)
220 pNext->pPrev = NULL;
221 }
222
223 if (pNext)
224 pNext->pPrev = pPrev;
225 else
226 {
227 pList->pTail = pPrev;
228
229 if (pPrev)
230 pPrev->pNext = NULL;
231 }
232
233 pEntry->pList = NULL;
234 pEntry->pPrev = NULL;
235 pEntry->pNext = NULL;
236 pdmBlkCacheListSub(pList, pEntry->cbData);
237#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
238 pdmBlkCacheCheckList(pList, pEntry);
239#endif
240}
241
242/**
243 * Adds a cache entry to the given LRU list unlinking it from the currently
244 * assigned list if needed.
245 *
246 * @returns nothing.
247 * @param pList List to the add entry to.
248 * @param pEntry Entry to add.
249 */
250static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
251{
252 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
253#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
254 pdmBlkCacheCheckList(pList, NULL);
255#endif
256
257 /* Remove from old list if needed */
258 if (pEntry->pList)
259 pdmBlkCacheEntryRemoveFromList(pEntry);
260
261 pEntry->pNext = pList->pHead;
262 if (pList->pHead)
263 pList->pHead->pPrev = pEntry;
264 else
265 {
266 Assert(!pList->pTail);
267 pList->pTail = pEntry;
268 }
269
270 pEntry->pPrev = NULL;
271 pList->pHead = pEntry;
272 pdmBlkCacheListAdd(pList, pEntry->cbData);
273 pEntry->pList = pList;
274#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
275 pdmBlkCacheCheckList(pList, NULL);
276#endif
277}
278
279/**
280 * Destroys a LRU list freeing all entries.
281 *
282 * @returns nothing
283 * @param pList Pointer to the LRU list to destroy.
284 *
285 * @note The caller must own the critical section of the cache.
286 */
287static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
288{
289 while (pList->pHead)
290 {
291 PPDMBLKCACHEENTRY pEntry = pList->pHead;
292
293 pList->pHead = pEntry->pNext;
294
295 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
296 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
297
298 RTMemPageFree(pEntry->pbData, pEntry->cbData);
299 RTMemFree(pEntry);
300 }
301}
302
303/**
304 * Tries to remove the given amount of bytes from a given list in the cache
305 * moving the entries to one of the given ghosts lists
306 *
307 * @returns Amount of data which could be freed.
308 * @param pCache Pointer to the global cache data.
309 * @param cbData The amount of the data to free.
310 * @param pListSrc The source list to evict data from.
311 * @param pGhostListSrc The ghost list removed entries should be moved to
312 * NULL if the entry should be freed.
313 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
314 * @param ppbBuf Where to store the address of the buffer if an entry with the
315 * same size was found and fReuseBuffer is true.
316 *
317 * @note This function may return fewer bytes than requested because entries
318 * may be marked as non evictable if they are used for I/O at the
319 * moment.
320 */
321static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
322 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
323 bool fReuseBuffer, uint8_t **ppbBuffer)
324{
325 size_t cbEvicted = 0;
326
327 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
328
329 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
330 AssertMsg( !pGhostListDst
331 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
332 ("Destination list must be NULL or the recently used but paged out list\n"));
333
334 if (fReuseBuffer)
335 {
336 AssertPtr(ppbBuffer);
337 *ppbBuffer = NULL;
338 }
339
340 /* Start deleting from the tail. */
341 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
342
343 while ((cbEvicted < cbData) && pEntry)
344 {
345 PPDMBLKCACHEENTRY pCurr = pEntry;
346
347 pEntry = pEntry->pPrev;
348
349 /* We can't evict pages which are currently in progress or dirty but not in progress */
350 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
351 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
352 {
353 /* Ok eviction candidate. Grab the endpoint semaphore and check again
354 * because somebody else might have raced us. */
355 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
356 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
357
358 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
359 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
360 {
361 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
362
363 if (fReuseBuffer && pCurr->cbData == cbData)
364 {
365 STAM_COUNTER_INC(&pCache->StatBuffersReused);
366 *ppbBuffer = pCurr->pbData;
367 }
368 else if (pCurr->pbData)
369 RTMemPageFree(pCurr->pbData, pCurr->cbData);
370
371 pCurr->pbData = NULL;
372 cbEvicted += pCurr->cbData;
373
374 pdmBlkCacheEntryRemoveFromList(pCurr);
375 pdmBlkCacheSub(pCache, pCurr->cbData);
376
377 if (pGhostListDst)
378 {
379 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
380
381 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
382
383 /* We have to remove the last entries from the paged out list. */
384 while ( pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax
385 && pGhostEntFree)
386 {
387 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
388 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
389
390 pGhostEntFree = pGhostEntFree->pPrev;
391
392 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
395 {
396 pdmBlkCacheEntryRemoveFromList(pFree);
397
398 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
399 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
400 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
401
402 RTMemFree(pFree);
403 }
404
405 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
406 }
407
408 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
409 {
410 /* Couldn't remove enough entries. Delete */
411 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
412 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
413 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
414
415 RTMemFree(pCurr);
416 }
417 else
418 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
419 }
420 else
421 {
422 /* Delete the entry from the AVL tree it is assigned to. */
423 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
424 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
425 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
426
427 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
428 RTMemFree(pCurr);
429 }
430 }
431
432 }
433 else
434 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
435 }
436
437 return cbEvicted;
438}
439
440static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
441{
442 size_t cbRemoved = 0;
443
444 if ((pCache->cbCached + cbData) < pCache->cbMax)
445 return true;
446 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
447 {
448 /* Try to evict as many bytes as possible from A1in */
449 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
450 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
451
452 /*
453 * If it was not possible to remove enough entries
454 * try the frequently accessed cache.
455 */
456 if (cbRemoved < cbData)
457 {
458 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
459
460 /*
461 * If we removed something we can't pass the reuse buffer flag anymore because
462 * we don't need to evict that much data
463 */
464 if (!cbRemoved)
465 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
466 NULL, fReuseBuffer, ppbBuffer);
467 else
468 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
469 NULL, false, NULL);
470 }
471 }
472 else
473 {
474 /* We have to remove entries from frequently access list. */
475 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
476 NULL, fReuseBuffer, ppbBuffer);
477 }
478
479 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
480 return (cbRemoved >= cbData);
481}
482
483DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, size_t cbXfer, PPDMBLKCACHEIOXFER pIoXfer)
484{
485 int rc = VINF_SUCCESS;
486
487 LogFlowFunc(("%s: Enqueuing hIoXfer=%#p enmXferDir=%d\n",
488 __FUNCTION__, pIoXfer, pIoXfer->enmXferDir));
489
490 switch (pBlkCache->enmType)
491 {
492 case PDMBLKCACHETYPE_DEV:
493 {
494 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
495 pIoXfer->enmXferDir,
496 off, cbXfer,
497 &pIoXfer->SgBuf, pIoXfer);
498 break;
499 }
500 case PDMBLKCACHETYPE_DRV:
501 {
502 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
503 pIoXfer->enmXferDir,
504 off, cbXfer,
505 &pIoXfer->SgBuf, pIoXfer);
506 break;
507 }
508 case PDMBLKCACHETYPE_USB:
509 {
510 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
511 pIoXfer->enmXferDir,
512 off, cbXfer,
513 &pIoXfer->SgBuf, pIoXfer);
514 break;
515 }
516 case PDMBLKCACHETYPE_INTERNAL:
517 {
518 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
519 pIoXfer->enmXferDir,
520 off, cbXfer,
521 &pIoXfer->SgBuf, pIoXfer);
522 break;
523 }
524 default:
525 AssertMsgFailed(("Unknown block cache type!\n"));
526 }
527
528 LogFlowFunc(("%s: returns rc=%Rrc\n", __FUNCTION__, rc));
529 return rc;
530}
531
532/**
533 * Initiates a read I/O task for the given entry.
534 *
535 * @returns VBox status code.
536 * @param pEntry The entry to fetch the data to.
537 */
538static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
539{
540 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
541 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
542
543 /* Make sure no one evicts the entry while it is accessed. */
544 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
545
546 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
547 if (RT_UNLIKELY(!pIoXfer))
548 return VERR_NO_MEMORY;
549
550 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
551
552 pIoXfer->fIoCache = true;
553 pIoXfer->pEntry = pEntry;
554 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
555 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
556 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
557 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
558
559 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
560}
561
562/**
563 * Initiates a write I/O task for the given entry.
564 *
565 * @returns nothing.
566 * @param pEntry The entry to read the data from.
567 */
568static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
569{
570 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
571 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
572
573 /* Make sure no one evicts the entry while it is accessed. */
574 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
575
576 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
577 if (RT_UNLIKELY(!pIoXfer))
578 return VERR_NO_MEMORY;
579
580 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
581
582 pIoXfer->fIoCache = true;
583 pIoXfer->pEntry = pEntry;
584 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
585 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
586 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
587 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
588
589 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pEntry->cbData, pIoXfer);
590}
591
592/**
593 * Passthrough a part of a request directly to the I/O manager
594 * handling the endpoint.
595 *
596 * @returns VBox status code.
597 * @param pEndpoint The endpoint.
598 * @param pTask The task.
599 * @param pIoMemCtx The I/O memory context to use.
600 * @param offStart Offset to start transfer from.
601 * @param cbData Amount of data to transfer.
602 * @param enmTransferType The transfer type (read/write)
603 */
604static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
605 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
606 PDMBLKCACHEXFERDIR enmXferDir)
607{
608
609 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
610 if (RT_UNLIKELY(!pIoXfer))
611 return VERR_NO_MEMORY;
612
613 ASMAtomicIncU32(&pReq->cXfersPending);
614 pIoXfer->fIoCache = false;
615 pIoXfer->pReq = pReq;
616 pIoXfer->enmXferDir = enmXferDir;
617 if (pSgBuf)
618 {
619 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
620 RTSgBufAdvance(pSgBuf, cbData);
621 }
622
623 return pdmBlkCacheEnqueue(pBlkCache, offStart, cbData, pIoXfer);
624}
625
626/**
627 * Commit a single dirty entry to the endpoint
628 *
629 * @returns nothing
630 * @param pEntry The entry to commit.
631 */
632static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
633{
634 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
635 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
636 ("Invalid flags set for entry %#p\n", pEntry));
637
638 pdmBlkCacheEntryWriteToMedium(pEntry);
639}
640
641/**
642 * Commit all dirty entries for a single endpoint.
643 *
644 * @returns nothing.
645 * @param pBlkCache The endpoint cache to commit.
646 */
647static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
648{
649 uint32_t cbCommitted = 0;
650
651 /* Return if the cache was suspended. */
652 if (pBlkCache->fSuspended)
653 return;
654
655 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
656
657 /* The list is moved to a new header to reduce locking overhead. */
658 RTLISTANCHOR ListDirtyNotCommitted;
659 RTSPINLOCKTMP Tmp;
660
661 RTListInit(&ListDirtyNotCommitted);
662 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
663 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
664 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
665
666 if (!RTListIsEmpty(&ListDirtyNotCommitted))
667 {
668 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
669
670 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
671 {
672 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
673 NodeNotCommitted);
674 pdmBlkCacheEntryCommit(pEntry);
675 cbCommitted += pEntry->cbData;
676 RTListNodeRemove(&pEntry->NodeNotCommitted);
677 pEntry = pNext;
678 }
679
680 /* Commit the last endpoint */
681 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
682 pdmBlkCacheEntryCommit(pEntry);
683 cbCommitted += pEntry->cbData;
684 RTListNodeRemove(&pEntry->NodeNotCommitted);
685 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
686 ("Committed all entries but list is not empty\n"));
687 }
688
689 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
690 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
691 ("Number of committed bytes exceeds number of dirty bytes\n"));
692 uint32_t cbDirtyOld = ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
693
694 /* Reset the commit timer if we don't have any dirty bits. */
695 if ( !(cbDirtyOld - cbCommitted)
696 && pBlkCache->pCache->u32CommitTimeoutMs != 0)
697 TMTimerStop(pBlkCache->pCache->pTimerCommit);
698}
699
700/**
701 * Commit all dirty entries in the cache.
702 *
703 * @returns nothing.
704 * @param pCache The global cache instance.
705 */
706static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
707{
708 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
709
710 if (!fCommitInProgress)
711 {
712 pdmBlkCacheLockEnter(pCache);
713 Assert(!RTListIsEmpty(&pCache->ListUsers));
714
715 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
716 AssertPtr(pBlkCache);
717
718 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
719 {
720 pdmBlkCacheCommit(pBlkCache);
721
722 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
723 NodeCacheUser);
724 }
725
726 /* Commit the last endpoint */
727 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
728 pdmBlkCacheCommit(pBlkCache);
729
730 pdmBlkCacheLockLeave(pCache);
731 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
732 }
733}
734
735/**
736 * Adds the given entry as a dirty to the cache.
737 *
738 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
739 * @param pBlkCache The endpoint cache the entry belongs to.
740 * @param pEntry The entry to add.
741 */
742static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
743{
744 bool fDirtyBytesExceeded = false;
745 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
746
747 /* If the commit timer is disabled we commit right away. */
748 if (pCache->u32CommitTimeoutMs == 0)
749 {
750 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
751 pdmBlkCacheEntryCommit(pEntry);
752 }
753 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
754 {
755 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
756
757 RTSPINLOCKTMP Tmp;
758 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
759 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
760 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
761
762 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
763
764 /* Prevent committing if the VM was suspended. */
765 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
766 fDirtyBytesExceeded = (cbDirty + pEntry->cbData >= pCache->cbCommitDirtyThreshold);
767 else if (!cbDirty && pCache->u32CommitTimeoutMs > 0)
768 {
769 /* Arm the commit timer. */
770 TMTimerSetMillies(pCache->pTimerCommit, pCache->u32CommitTimeoutMs);
771 }
772 }
773
774 return fDirtyBytesExceeded;
775}
776
777static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
778{
779 bool fFound = false;
780 PPDMBLKCACHE pBlkCache = NULL;
781
782 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
783 {
784 if (!RTStrCmp(pBlkCache->pszId, pcszId))
785 {
786 fFound = true;
787 break;
788 }
789 }
790
791 return fFound ? pBlkCache : NULL;
792}
793
794/**
795 * Commit timer callback.
796 */
797static DECLCALLBACK(void) pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
798{
799 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
800 NOREF(pVM); NOREF(pTimer);
801
802 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
803
804 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
805 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
806 pdmBlkCacheCommitDirtyEntries(pCache);
807
808 LogFlowFunc(("Entries committed, going to sleep\n"));
809}
810
811static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
812{
813 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
814
815 AssertPtr(pBlkCacheGlobal);
816
817 pdmBlkCacheLockEnter(pBlkCacheGlobal);
818
819 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
820
821 /* Go through the list and save all dirty entries. */
822 PPDMBLKCACHE pBlkCache;
823 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
824 {
825 uint32_t cEntries = 0;
826 PPDMBLKCACHEENTRY pEntry;
827
828 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
829 SSMR3PutU32(pSSM, (uint32_t)strlen(pBlkCache->pszId));
830 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
831
832 /* Count the number of entries to safe. */
833 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
834 {
835 cEntries++;
836 }
837
838 SSMR3PutU32(pSSM, cEntries);
839
840 /* Walk the list of all dirty entries and save them. */
841 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
842 {
843 /* A few sanity checks. */
844 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
845 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
846 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
847 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
848 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
849 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
850 ("Invalid list\n"));
851 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
852 ("Size and range do not match\n"));
853
854 /* Save */
855 SSMR3PutU64(pSSM, pEntry->Core.Key);
856 SSMR3PutU32(pSSM, pEntry->cbData);
857 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
858 }
859
860 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
861 }
862
863 pdmBlkCacheLockLeave(pBlkCacheGlobal);
864
865 /* Terminator */
866 return SSMR3PutU32(pSSM, UINT32_MAX);
867}
868
869static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
870{
871 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
872 uint32_t cRefs;
873
874 NOREF(uPass);
875 AssertPtr(pBlkCacheGlobal);
876
877 pdmBlkCacheLockEnter(pBlkCacheGlobal);
878
879 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
880 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
881
882 SSMR3GetU32(pSSM, &cRefs);
883
884 /*
885 * Fewer users in the saved state than in the current VM are allowed
886 * because that means that there are only new ones which don't have any saved state
887 * which can get lost.
888 * More saved entries that current ones are not allowed because this could result in
889 * lost data.
890 */
891 int rc = VINF_SUCCESS;
892 if (cRefs <= pBlkCacheGlobal->cRefs)
893 {
894 char *pszId = NULL;
895
896 while ( cRefs > 0
897 && RT_SUCCESS(rc))
898 {
899 PPDMBLKCACHE pBlkCache = NULL;
900 uint32_t cbId = 0;
901
902 SSMR3GetU32(pSSM, &cbId);
903 Assert(cbId > 0);
904
905 cbId++; /* Include terminator */
906 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
907 if (!pszId)
908 {
909 rc = VERR_NO_MEMORY;
910 break;
911 }
912
913 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
914 AssertRC(rc);
915
916 /* Search for the block cache with the provided id. */
917 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
918 if (!pBlkCache)
919 {
920 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
921 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
922 break;
923 }
924
925 RTStrFree(pszId);
926 pszId = NULL;
927
928 /* Get the entries */
929 uint32_t cEntries;
930 SSMR3GetU32(pSSM, &cEntries);
931
932 while (cEntries > 0)
933 {
934 PPDMBLKCACHEENTRY pEntry;
935 uint64_t off;
936 uint32_t cbEntry;
937
938 SSMR3GetU64(pSSM, &off);
939 SSMR3GetU32(pSSM, &cbEntry);
940
941 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
942 if (!pEntry)
943 {
944 rc = VERR_NO_MEMORY;
945 break;
946 }
947
948 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
949 if (RT_FAILURE(rc))
950 {
951 RTMemFree(pEntry->pbData);
952 RTMemFree(pEntry);
953 break;
954 }
955
956 /* Insert into the tree. */
957 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
958 Assert(fInserted); NOREF(fInserted);
959
960 /* Add to the dirty list. */
961 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
962 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
963 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
964 pdmBlkCacheEntryRelease(pEntry);
965 cEntries--;
966 }
967
968 cRefs--;
969 }
970
971 if (pszId)
972 RTStrFree(pszId);
973 }
974 else
975 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
976 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
977
978 pdmBlkCacheLockLeave(pBlkCacheGlobal);
979
980 if (RT_SUCCESS(rc))
981 {
982 uint32_t u32 = 0;
983 rc = SSMR3GetU32(pSSM, &u32);
984 if (RT_SUCCESS(rc))
985 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
986 }
987
988 return rc;
989}
990
991int pdmR3BlkCacheInit(PVM pVM)
992{
993 int rc = VINF_SUCCESS;
994 PUVM pUVM = pVM->pUVM;
995 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
996
997 LogFlowFunc((": pVM=%p\n", pVM));
998
999 VM_ASSERT_EMT(pVM);
1000
1001 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
1002 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
1003
1004 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
1005 if (!pBlkCacheGlobal)
1006 return VERR_NO_MEMORY;
1007
1008 RTListInit(&pBlkCacheGlobal->ListUsers);
1009 pBlkCacheGlobal->pVM = pVM;
1010 pBlkCacheGlobal->cRefs = 0;
1011 pBlkCacheGlobal->cbCached = 0;
1012 pBlkCacheGlobal->fCommitInProgress = false;
1013
1014 /* Initialize members */
1015 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
1016 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
1017 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
1018
1019 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
1020 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
1021 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
1022
1023 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
1024 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1025 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1026
1027 do
1028 {
1029 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1030 AssertLogRelRCBreak(rc);
1031 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1032
1033 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1034 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1035 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1036 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1037
1038 /** @todo r=aeichner: Experiment to find optimal default values */
1039 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1040 AssertLogRelRCBreak(rc);
1041 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1042 AssertLogRelRCBreak(rc);
1043 } while (0);
1044
1045 if (RT_SUCCESS(rc))
1046 {
1047 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1048 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1049 "/PDM/BlkCache/cbMax",
1050 STAMUNIT_BYTES,
1051 "Maximum cache size");
1052 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1053 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1054 "/PDM/BlkCache/cbCached",
1055 STAMUNIT_BYTES,
1056 "Currently used cache");
1057 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1058 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1059 "/PDM/BlkCache/cbCachedMruIn",
1060 STAMUNIT_BYTES,
1061 "Number of bytes cached in MRU list");
1062 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1063 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1064 "/PDM/BlkCache/cbCachedMruOut",
1065 STAMUNIT_BYTES,
1066 "Number of bytes cached in FRU list");
1067 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1068 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1069 "/PDM/BlkCache/cbCachedFru",
1070 STAMUNIT_BYTES,
1071 "Number of bytes cached in FRU ghost list");
1072
1073#ifdef VBOX_WITH_STATISTICS
1074 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1075 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1076 "/PDM/BlkCache/CacheHits",
1077 STAMUNIT_COUNT, "Number of hits in the cache");
1078 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1079 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1080 "/PDM/BlkCache/CachePartialHits",
1081 STAMUNIT_COUNT, "Number of partial hits in the cache");
1082 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1083 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1084 "/PDM/BlkCache/CacheMisses",
1085 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1086 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1087 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1088 "/PDM/BlkCache/CacheRead",
1089 STAMUNIT_BYTES, "Number of bytes read from the cache");
1090 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1091 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1092 "/PDM/BlkCache/CacheWritten",
1093 STAMUNIT_BYTES, "Number of bytes written to the cache");
1094 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1095 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1096 "/PDM/BlkCache/CacheTreeGet",
1097 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1098 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1099 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1100 "/PDM/BlkCache/CacheTreeInsert",
1101 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1102 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1103 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1104 "/PDM/BlkCache/CacheTreeRemove",
1105 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1106 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1107 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1108 "/PDM/BlkCache/CacheBuffersReused",
1109 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1110#endif
1111
1112 /* Initialize the critical section */
1113 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1114 }
1115
1116 if (RT_SUCCESS(rc))
1117 {
1118 /* Create the commit timer */
1119 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1120 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1121 pdmBlkCacheCommitTimerCallback,
1122 pBlkCacheGlobal,
1123 "BlkCache-Commit",
1124 &pBlkCacheGlobal->pTimerCommit);
1125
1126 if (RT_SUCCESS(rc))
1127 {
1128 /* Register saved state handler. */
1129 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1130 NULL, NULL, NULL,
1131 NULL, pdmR3BlkCacheSaveExec, NULL,
1132 NULL, pdmR3BlkCacheLoadExec, NULL);
1133 if (RT_SUCCESS(rc))
1134 {
1135 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1136 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1137 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1138 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1139 return VINF_SUCCESS;
1140 }
1141 }
1142
1143 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1144 }
1145
1146 if (pBlkCacheGlobal)
1147 RTMemFree(pBlkCacheGlobal);
1148
1149 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
1150 return rc;
1151}
1152
1153void pdmR3BlkCacheTerm(PVM pVM)
1154{
1155 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1156
1157 if (pBlkCacheGlobal)
1158 {
1159 /* Make sure no one else uses the cache now */
1160 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1161
1162 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1163 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1164 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1165 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1166
1167 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1168
1169 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1170 RTMemFree(pBlkCacheGlobal);
1171 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1172 }
1173}
1174
1175int pdmR3BlkCacheResume(PVM pVM)
1176{
1177 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1178
1179 LogFlowFunc(("pVM=%#p\n", pVM));
1180
1181 if ( pBlkCacheGlobal
1182 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1183 {
1184 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1185 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1186 }
1187
1188 return VINF_SUCCESS;
1189}
1190
1191static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1192{
1193 int rc = VINF_SUCCESS;
1194 PPDMBLKCACHE pBlkCache = NULL;
1195 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1196
1197 if (!pBlkCacheGlobal)
1198 return VERR_NOT_SUPPORTED;
1199
1200 /*
1201 * Check that no other user cache has the same id first,
1202 * Unique id's are necessary in case the state is saved.
1203 */
1204 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1205
1206 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1207
1208 if (!pBlkCache)
1209 {
1210 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1211
1212 if (pBlkCache)
1213 pBlkCache->pszId = RTStrDup(pcszId);
1214
1215 if ( pBlkCache
1216 && pBlkCache->pszId)
1217 {
1218 pBlkCache->fSuspended = false;
1219 pBlkCache->pCache = pBlkCacheGlobal;
1220 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1221
1222 rc = RTSpinlockCreate(&pBlkCache->LockList);
1223 if (RT_SUCCESS(rc))
1224 {
1225 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1226 if (RT_SUCCESS(rc))
1227 {
1228 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1229 if (pBlkCache->pTree)
1230 {
1231#ifdef VBOX_WITH_STATISTICS
1232 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1233 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1234 STAMUNIT_COUNT, "Number of deferred writes",
1235 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1236#endif
1237
1238 /* Add to the list of users. */
1239 pBlkCacheGlobal->cRefs++;
1240 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1241 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1242
1243 *ppBlkCache = pBlkCache;
1244 LogFlowFunc(("returns success\n"));
1245 return VINF_SUCCESS;
1246 }
1247 else
1248 rc = VERR_NO_MEMORY;
1249
1250 RTSemRWDestroy(pBlkCache->SemRWEntries);
1251 }
1252
1253 RTSpinlockDestroy(pBlkCache->LockList);
1254 }
1255
1256 RTStrFree(pBlkCache->pszId);
1257 }
1258 else
1259 rc = VERR_NO_MEMORY;
1260
1261 if (pBlkCache)
1262 RTMemFree(pBlkCache);
1263 }
1264 else
1265 rc = VERR_ALREADY_EXISTS;
1266
1267 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1268
1269 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1270 return rc;
1271}
1272
1273VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1274 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1275 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1276 PFNPDMBLKCACHEXFERENQUEUEDISCARDDRV pfnXferEnqueueDiscard,
1277 const char *pcszId)
1278{
1279 int rc = VINF_SUCCESS;
1280 PPDMBLKCACHE pBlkCache;
1281
1282 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1283 if (RT_SUCCESS(rc))
1284 {
1285 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1286 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1287 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1288 pBlkCache->u.Drv.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1289 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1290 *ppBlkCache = pBlkCache;
1291 }
1292
1293 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1294 return rc;
1295}
1296
1297VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1298 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1299 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1300 PFNPDMBLKCACHEXFERENQUEUEDISCARDDEV pfnXferEnqueueDiscard,
1301 const char *pcszId)
1302{
1303 int rc = VINF_SUCCESS;
1304 PPDMBLKCACHE pBlkCache;
1305
1306 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1307 if (RT_SUCCESS(rc))
1308 {
1309 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1310 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1311 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1312 pBlkCache->u.Dev.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1313 pBlkCache->u.Dev.pDevIns = pDevIns;
1314 *ppBlkCache = pBlkCache;
1315 }
1316
1317 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1318 return rc;
1319
1320}
1321
1322VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1323 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1324 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1325 PFNPDMBLKCACHEXFERENQUEUEDISCARDUSB pfnXferEnqueueDiscard,
1326 const char *pcszId)
1327{
1328 int rc = VINF_SUCCESS;
1329 PPDMBLKCACHE pBlkCache;
1330
1331 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1332 if (RT_SUCCESS(rc))
1333 {
1334 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1335 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1336 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1337 pBlkCache->u.Usb.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1338 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1339 *ppBlkCache = pBlkCache;
1340 }
1341
1342 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1343 return rc;
1344
1345}
1346
1347VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1348 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1349 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1350 PFNPDMBLKCACHEXFERENQUEUEDISCARDINT pfnXferEnqueueDiscard,
1351 const char *pcszId)
1352{
1353 int rc = VINF_SUCCESS;
1354 PPDMBLKCACHE pBlkCache;
1355
1356 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1357 if (RT_SUCCESS(rc))
1358 {
1359 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1360 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1361 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1362 pBlkCache->u.Int.pfnXferEnqueueDiscard = pfnXferEnqueueDiscard;
1363 pBlkCache->u.Int.pvUser = pvUser;
1364 *ppBlkCache = pBlkCache;
1365 }
1366
1367 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1368 return rc;
1369
1370}
1371
1372/**
1373 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1374 *
1375 * @returns IPRT status code.
1376 * @param pNode The node to destroy.
1377 * @param pvUser Opaque user data.
1378 */
1379static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1380{
1381 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1382 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1383 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1384
1385 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1386 {
1387 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1388 pdmBlkCacheEntryRef(pEntry);
1389 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1390 pdmBlkCacheLockLeave(pCache);
1391
1392 RTThreadSleep(250);
1393
1394 /* Re-enter all locks */
1395 pdmBlkCacheLockEnter(pCache);
1396 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1397 pdmBlkCacheEntryRelease(pEntry);
1398 }
1399
1400 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1401 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1402
1403 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1404 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1405
1406 pdmBlkCacheEntryRemoveFromList(pEntry);
1407
1408 if (fUpdateCache)
1409 pdmBlkCacheSub(pCache, pEntry->cbData);
1410
1411 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1412 RTMemFree(pEntry);
1413
1414 return VINF_SUCCESS;
1415}
1416
1417/**
1418 * Destroys all cache resources used by the given endpoint.
1419 *
1420 * @returns nothing.
1421 * @param pEndpoint The endpoint to the destroy.
1422 */
1423VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1424{
1425 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1426
1427 /*
1428 * Commit all dirty entries now (they are waited on for completion during the
1429 * destruction of the AVL tree below).
1430 * The exception is if the VM was paused because of an I/O error before.
1431 */
1432 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1433 pdmBlkCacheCommit(pBlkCache);
1434
1435 /* Make sure nobody is accessing the cache while we delete the tree. */
1436 pdmBlkCacheLockEnter(pCache);
1437 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1438 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1439 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1440
1441 RTSpinlockDestroy(pBlkCache->LockList);
1442
1443 pCache->cRefs--;
1444 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1445
1446 pdmBlkCacheLockLeave(pCache);
1447
1448 RTSemRWDestroy(pBlkCache->SemRWEntries);
1449
1450#ifdef VBOX_WITH_STATISTICS
1451 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1452#endif
1453
1454 RTStrFree(pBlkCache->pszId);
1455 RTMemFree(pBlkCache);
1456}
1457
1458VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1459{
1460 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1461
1462 /*
1463 * Validate input.
1464 */
1465 if (!pDevIns)
1466 return;
1467 VM_ASSERT_EMT(pVM);
1468
1469 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1470 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1471
1472 /* Return silently if not supported. */
1473 if (!pBlkCacheGlobal)
1474 return;
1475
1476 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1477
1478 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1479 {
1480 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1481 && pBlkCache->u.Dev.pDevIns == pDevIns)
1482 PDMR3BlkCacheRelease(pBlkCache);
1483 }
1484
1485 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1486}
1487
1488VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1489{
1490 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1491
1492 /*
1493 * Validate input.
1494 */
1495 if (!pDrvIns)
1496 return;
1497 VM_ASSERT_EMT(pVM);
1498
1499 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1500 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1501
1502 /* Return silently if not supported. */
1503 if (!pBlkCacheGlobal)
1504 return;
1505
1506 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1507
1508 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1509 {
1510 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1511 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1512 PDMR3BlkCacheRelease(pBlkCache);
1513 }
1514
1515 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1516}
1517
1518VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1519{
1520 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1521
1522 /*
1523 * Validate input.
1524 */
1525 if (!pUsbIns)
1526 return;
1527 VM_ASSERT_EMT(pVM);
1528
1529 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1530 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1531
1532 /* Return silently if not supported. */
1533 if (!pBlkCacheGlobal)
1534 return;
1535
1536 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1537
1538 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1539 {
1540 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1541 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1542 PDMR3BlkCacheRelease(pBlkCache);
1543 }
1544
1545 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1546}
1547
1548static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1549{
1550 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1551
1552 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1553 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1554 if (pEntry)
1555 pdmBlkCacheEntryRef(pEntry);
1556 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1557
1558 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1559
1560 return pEntry;
1561}
1562
1563/**
1564 * Return the best fit cache entries for the given offset.
1565 *
1566 * @returns nothing.
1567 * @param pBlkCache The endpoint cache.
1568 * @param off The offset.
1569 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1570 * the given offset. NULL if not required.
1571 */
1572static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1573 PPDMBLKCACHEENTRY *ppEntryAbove)
1574{
1575 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeGet, Cache);
1576
1577 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1578 if (ppEntryAbove)
1579 {
1580 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1581 if (*ppEntryAbove)
1582 pdmBlkCacheEntryRef(*ppEntryAbove);
1583 }
1584
1585 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1586
1587 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeGet, Cache);
1588}
1589
1590static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1591{
1592 STAM_PROFILE_ADV_START(&pBlkCache->pCache->StatTreeInsert, Cache);
1593 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1594 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1595 AssertMsg(fInserted, ("Node was not inserted into tree\n")); NOREF(fInserted);
1596 STAM_PROFILE_ADV_STOP(&pBlkCache->pCache->StatTreeInsert, Cache);
1597 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1598}
1599
1600/**
1601 * Allocates and initializes a new entry for the cache.
1602 * The entry has a reference count of 1.
1603 *
1604 * @returns Pointer to the new cache entry or NULL if out of memory.
1605 * @param pBlkCache The cache the entry belongs to.
1606 * @param off Start offset.
1607 * @param cbData Size of the cache entry.
1608 * @param pbBuffer Pointer to the buffer to use.
1609 * NULL if a new buffer should be allocated.
1610 * The buffer needs to have the same size of the entry.
1611 */
1612static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1613 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1614{
1615 AssertReturn(cbData <= UINT32_MAX, NULL);
1616 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1617
1618 if (RT_UNLIKELY(!pEntryNew))
1619 return NULL;
1620
1621 pEntryNew->Core.Key = off;
1622 pEntryNew->Core.KeyLast = off + cbData - 1;
1623 pEntryNew->pBlkCache = pBlkCache;
1624 pEntryNew->fFlags = 0;
1625 pEntryNew->cRefs = 1; /* We are using it now. */
1626 pEntryNew->pList = NULL;
1627 pEntryNew->cbData = (uint32_t)cbData;
1628 pEntryNew->pWaitingHead = NULL;
1629 pEntryNew->pWaitingTail = NULL;
1630 if (pbBuffer)
1631 pEntryNew->pbData = pbBuffer;
1632 else
1633 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1634
1635 if (RT_UNLIKELY(!pEntryNew->pbData))
1636 {
1637 RTMemFree(pEntryNew);
1638 return NULL;
1639 }
1640
1641 return pEntryNew;
1642}
1643
1644/**
1645 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1646 * in exclusive mode.
1647 *
1648 * @returns true if the flag in fSet is set and the one in fClear is clear.
1649 * false otherwise.
1650 * The R/W semaphore is only held if true is returned.
1651 *
1652 * @param pBlkCache The endpoint cache instance data.
1653 * @param pEntry The entry to check the flags for.
1654 * @param fSet The flag which is tested to be set.
1655 * @param fClear The flag which is tested to be clear.
1656 */
1657DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1658 PPDMBLKCACHEENTRY pEntry,
1659 uint32_t fSet, uint32_t fClear)
1660{
1661 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1662 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1663
1664 if (fPassed)
1665 {
1666 /* Acquire the lock and check again because the completion callback might have raced us. */
1667 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1668
1669 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1670 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1671
1672 /* Drop the lock if we didn't passed the test. */
1673 if (!fPassed)
1674 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1675 }
1676
1677 return fPassed;
1678}
1679
1680/**
1681 * Adds a segment to the waiting list for a cache entry
1682 * which is currently in progress.
1683 *
1684 * @returns nothing.
1685 * @param pEntry The cache entry to add the segment to.
1686 * @param pSeg The segment to add.
1687 */
1688DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1689 PPDMBLKCACHEWAITER pWaiter)
1690{
1691 pWaiter->pNext = NULL;
1692
1693 if (pEntry->pWaitingHead)
1694 {
1695 AssertPtr(pEntry->pWaitingTail);
1696
1697 pEntry->pWaitingTail->pNext = pWaiter;
1698 pEntry->pWaitingTail = pWaiter;
1699 }
1700 else
1701 {
1702 Assert(!pEntry->pWaitingTail);
1703
1704 pEntry->pWaitingHead = pWaiter;
1705 pEntry->pWaitingTail = pWaiter;
1706 }
1707}
1708
1709/**
1710 * Add a buffer described by the I/O memory context
1711 * to the entry waiting for completion.
1712 *
1713 * @returns VBox status code.
1714 * @param pEntry The entry to add the buffer to.
1715 * @param pTask Task associated with the buffer.
1716 * @param pIoMemCtx The memory context to use.
1717 * @param offDiff Offset from the start of the buffer
1718 * in the entry.
1719 * @param cbData Amount of data to wait for onthis entry.
1720 * @param fWrite Flag whether the task waits because it wants to write
1721 * to the cache entry.
1722 */
1723static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1724 PPDMBLKCACHEREQ pReq,
1725 PRTSGBUF pSgBuf, uint64_t offDiff,
1726 size_t cbData, bool fWrite)
1727{
1728 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1729 if (!pWaiter)
1730 return VERR_NO_MEMORY;
1731
1732 ASMAtomicIncU32(&pReq->cXfersPending);
1733 pWaiter->pReq = pReq;
1734 pWaiter->offCacheEntry = offDiff;
1735 pWaiter->cbTransfer = cbData;
1736 pWaiter->fWrite = fWrite;
1737 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1738 RTSgBufAdvance(pSgBuf, cbData);
1739
1740 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1741
1742 return VINF_SUCCESS;
1743}
1744
1745/**
1746 * Calculate aligned offset and size for a new cache entry which do not
1747 * intersect with an already existing entry and the file end.
1748 *
1749 * @returns The number of bytes the entry can hold of the requested amount
1750 * of bytes.
1751 * @param pEndpoint The endpoint.
1752 * @param pBlkCache The endpoint cache.
1753 * @param off The start offset.
1754 * @param cb The number of bytes the entry needs to hold at
1755 * least.
1756 * @param pcbEntry Where to store the number of bytes the entry can hold.
1757 * Can be less than given because of other entries.
1758 */
1759static uint32_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1760 uint64_t off, uint32_t cb,
1761 uint32_t *pcbEntry)
1762{
1763 /* Get the best fit entries around the offset */
1764 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1765 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1766
1767 /* Log the info */
1768 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1769 pEntryAbove ? "B" : "No b",
1770 off,
1771 pEntryAbove ? pEntryAbove->Core.Key : 0,
1772 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1773 pEntryAbove ? pEntryAbove->cbData : 0));
1774
1775 uint32_t cbNext;
1776 uint32_t cbInEntry;
1777 if ( pEntryAbove
1778 && off + cb > pEntryAbove->Core.Key)
1779 {
1780 cbInEntry = (uint32_t)(pEntryAbove->Core.Key - off);
1781 cbNext = (uint32_t)(pEntryAbove->Core.Key - off);
1782 }
1783 else
1784 {
1785 cbInEntry = cb;
1786 cbNext = cb;
1787 }
1788
1789 /* A few sanity checks */
1790 AssertMsg(!pEntryAbove || off + cbNext <= pEntryAbove->Core.Key,
1791 ("Aligned size intersects with another cache entry\n"));
1792 Assert(cbInEntry <= cbNext);
1793
1794 if (pEntryAbove)
1795 pdmBlkCacheEntryRelease(pEntryAbove);
1796
1797 LogFlow(("off=%llu cbNext=%u\n", off, cbNext));
1798
1799 *pcbEntry = cbNext;
1800
1801 return cbInEntry;
1802}
1803
1804/**
1805 * Create a new cache entry evicting data from the cache if required.
1806 *
1807 * @returns Pointer to the new cache entry or NULL
1808 * if not enough bytes could be evicted from the cache.
1809 * @param pEndpoint The endpoint.
1810 * @param pBlkCache The endpoint cache.
1811 * @param off The offset.
1812 * @param cb Number of bytes the cache entry should have.
1813 * @param pcbData Where to store the number of bytes the new
1814 * entry can hold. May be lower than actually requested
1815 * due to another entry intersecting the access range.
1816 */
1817static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1818 uint64_t off, size_t cb,
1819 size_t *pcbData)
1820{
1821 uint32_t cbEntry = 0;
1822
1823 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, (uint32_t)cb, &cbEntry);
1824 AssertReturn(cb <= UINT32_MAX, NULL);
1825
1826 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1827 pdmBlkCacheLockEnter(pCache);
1828
1829 PPDMBLKCACHEENTRY pEntryNew = NULL;
1830 uint8_t *pbBuffer = NULL;
1831 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1832 if (fEnough)
1833 {
1834 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1835
1836 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, pbBuffer);
1837 if (RT_LIKELY(pEntryNew))
1838 {
1839 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1840 pdmBlkCacheAdd(pCache, cbEntry);
1841 pdmBlkCacheLockLeave(pCache);
1842
1843 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1844
1845 AssertMsg( (off >= pEntryNew->Core.Key)
1846 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1847 ("Overflow in calculation off=%llu\n", off));
1848 }
1849 else
1850 pdmBlkCacheLockLeave(pCache);
1851 }
1852 else
1853 pdmBlkCacheLockLeave(pCache);
1854
1855 return pEntryNew;
1856}
1857
1858static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(void *pvUser)
1859{
1860 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1861
1862 if (RT_LIKELY(pReq))
1863 {
1864 pReq->pvUser = pvUser;
1865 pReq->rcReq = VINF_SUCCESS;
1866 pReq->cXfersPending = 0;
1867 }
1868
1869 return pReq;
1870}
1871
1872static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1873{
1874 switch (pBlkCache->enmType)
1875 {
1876 case PDMBLKCACHETYPE_DEV:
1877 {
1878 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1879 pReq->pvUser, pReq->rcReq);
1880 break;
1881 }
1882 case PDMBLKCACHETYPE_DRV:
1883 {
1884 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1885 pReq->pvUser, pReq->rcReq);
1886 break;
1887 }
1888 case PDMBLKCACHETYPE_USB:
1889 {
1890 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1891 pReq->pvUser, pReq->rcReq);
1892 break;
1893 }
1894 case PDMBLKCACHETYPE_INTERNAL:
1895 {
1896 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1897 pReq->pvUser, pReq->rcReq);
1898 break;
1899 }
1900 default:
1901 AssertMsgFailed(("Unknown block cache type!\n"));
1902 }
1903
1904 RTMemFree(pReq);
1905}
1906
1907static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1908 int rcReq, bool fCallHandler)
1909{
1910 if (RT_FAILURE(rcReq))
1911 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1912
1913 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1914 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1915
1916 if (!cXfersPending)
1917 {
1918 if (fCallHandler)
1919 pdmBlkCacheReqComplete(pBlkCache, pReq);
1920 else
1921 RTMemFree(pReq);
1922 return true;
1923 }
1924
1925 LogFlowFunc(("pReq=%#p cXfersPending=%u\n", pReq, cXfersPending));
1926 return false;
1927}
1928
1929VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1930 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1931{
1932 int rc = VINF_SUCCESS;
1933 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1934 PPDMBLKCACHEENTRY pEntry;
1935 PPDMBLKCACHEREQ pReq;
1936
1937 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1938 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1939
1940 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
1941 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
1942
1943 RTSGBUF SgBuf;
1944 RTSgBufClone(&SgBuf, pcSgBuf);
1945
1946 /* Allocate new request structure. */
1947 pReq = pdmBlkCacheReqAlloc(pvUser);
1948 if (RT_UNLIKELY(!pReq))
1949 return VERR_NO_MEMORY;
1950
1951 /* Increment data transfer counter to keep the request valid while we access it. */
1952 ASMAtomicIncU32(&pReq->cXfersPending);
1953
1954 while (cbRead)
1955 {
1956 size_t cbToRead;
1957
1958 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1959
1960 /*
1961 * If there is no entry we try to create a new one eviciting unused pages
1962 * if the cache is full. If this is not possible we will pass the request through
1963 * and skip the caching (all entries may be still in progress so they can't
1964 * be evicted)
1965 * If we have an entry it can be in one of the LRU lists where the entry
1966 * contains data (recently used or frequently used LRU) so we can just read
1967 * the data we need and put the entry at the head of the frequently used LRU list.
1968 * In case the entry is in one of the ghost lists it doesn't contain any data.
1969 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1970 */
1971 if (pEntry)
1972 {
1973 uint64_t offDiff = off - pEntry->Core.Key;
1974
1975 AssertMsg(off >= pEntry->Core.Key,
1976 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1977 off, pEntry->Core.Key));
1978
1979 AssertPtr(pEntry->pList);
1980
1981 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1982
1983 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1984 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1985 off, cbToRead));
1986
1987 cbRead -= cbToRead;
1988
1989 if (!cbRead)
1990 STAM_COUNTER_INC(&pCache->cHits);
1991 else
1992 STAM_COUNTER_INC(&pCache->cPartialHits);
1993
1994 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1995
1996 /* Ghost lists contain no data. */
1997 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1998 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1999 {
2000 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2001 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2002 PDMBLKCACHE_ENTRY_IS_DIRTY))
2003 {
2004 /* Entry didn't completed yet. Append to the list */
2005 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2006 &SgBuf, offDiff, cbToRead,
2007 false /* fWrite */);
2008 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2009 }
2010 else
2011 {
2012 /* Read as much as we can from the entry. */
2013 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2014 }
2015
2016 /* Move this entry to the top position */
2017 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2018 {
2019 pdmBlkCacheLockEnter(pCache);
2020 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2021 pdmBlkCacheLockLeave(pCache);
2022 }
2023 /* Release the entry */
2024 pdmBlkCacheEntryRelease(pEntry);
2025 }
2026 else
2027 {
2028 uint8_t *pbBuffer = NULL;
2029
2030 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2031
2032 pdmBlkCacheLockEnter(pCache);
2033 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2034 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2035
2036 /* Move the entry to Am and fetch it to the cache. */
2037 if (fEnough)
2038 {
2039 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2040 pdmBlkCacheAdd(pCache, pEntry->cbData);
2041 pdmBlkCacheLockLeave(pCache);
2042
2043 if (pbBuffer)
2044 pEntry->pbData = pbBuffer;
2045 else
2046 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2047 AssertPtr(pEntry->pbData);
2048
2049 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2050 &SgBuf, offDiff, cbToRead,
2051 false /* fWrite */);
2052 pdmBlkCacheEntryReadFromMedium(pEntry);
2053 /* Release the entry */
2054 pdmBlkCacheEntryRelease(pEntry);
2055 }
2056 else
2057 {
2058 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2059 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2060 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2061 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2062 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2063
2064 pdmBlkCacheLockLeave(pCache);
2065
2066 RTMemFree(pEntry);
2067
2068 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2069 &SgBuf, off, cbToRead,
2070 PDMBLKCACHEXFERDIR_READ);
2071 }
2072 }
2073 }
2074 else
2075 {
2076#ifdef VBOX_WITH_IO_READ_CACHE
2077 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2078 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2079 off, cbRead,
2080 &cbToRead);
2081
2082 cbRead -= cbToRead;
2083
2084 if (pEntryNew)
2085 {
2086 if (!cbRead)
2087 STAM_COUNTER_INC(&pCache->cMisses);
2088 else
2089 STAM_COUNTER_INC(&pCache->cPartialHits);
2090
2091 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2092 &SgBuf,
2093 off - pEntryNew->Core.Key,
2094 cbToRead,
2095 false /* fWrite */);
2096 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2097 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2098 }
2099 else
2100 {
2101 /*
2102 * There is not enough free space in the cache.
2103 * Pass the request directly to the I/O manager.
2104 */
2105 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2106
2107 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2108 &SgBuf, off, cbToRead,
2109 PDMBLKCACHEXFERDIR_READ);
2110 }
2111#else
2112 /* Clip read size if necessary. */
2113 PPDMBLKCACHEENTRY pEntryAbove;
2114 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2115
2116 if (pEntryAbove)
2117 {
2118 if (off + cbRead > pEntryAbove->Core.Key)
2119 cbToRead = pEntryAbove->Core.Key - off;
2120 else
2121 cbToRead = cbRead;
2122
2123 pdmBlkCacheEntryRelease(pEntryAbove);
2124 }
2125 else
2126 cbToRead = cbRead;
2127
2128 cbRead -= cbToRead;
2129 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2130 &SgBuf, off, cbToRead,
2131 PDMBLKCACHEXFERDIR_READ);
2132#endif
2133 }
2134 off += cbToRead;
2135 }
2136
2137 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2138 rc = VINF_AIO_TASK_PENDING;
2139
2140 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2141
2142 return rc;
2143}
2144
2145VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
2146 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
2147{
2148 int rc = VINF_SUCCESS;
2149 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2150 PPDMBLKCACHEENTRY pEntry;
2151 PPDMBLKCACHEREQ pReq;
2152
2153 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2154 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
2155
2156 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2157 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2158
2159 RTSGBUF SgBuf;
2160 RTSgBufClone(&SgBuf, pcSgBuf);
2161
2162 /* Allocate new request structure. */
2163 pReq = pdmBlkCacheReqAlloc(pvUser);
2164 if (RT_UNLIKELY(!pReq))
2165 return VERR_NO_MEMORY;
2166
2167 /* Increment data transfer counter to keep the request valid while we access it. */
2168 ASMAtomicIncU32(&pReq->cXfersPending);
2169
2170 while (cbWrite)
2171 {
2172 size_t cbToWrite;
2173
2174 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2175 if (pEntry)
2176 {
2177 /* Write the data into the entry and mark it as dirty */
2178 AssertPtr(pEntry->pList);
2179
2180 uint64_t offDiff = off - pEntry->Core.Key;
2181
2182 AssertMsg(off >= pEntry->Core.Key,
2183 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2184 off, pEntry->Core.Key));
2185
2186 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2187 cbWrite -= cbToWrite;
2188
2189 if (!cbWrite)
2190 STAM_COUNTER_INC(&pCache->cHits);
2191 else
2192 STAM_COUNTER_INC(&pCache->cPartialHits);
2193
2194 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2195
2196 /* Ghost lists contain no data. */
2197 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2198 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2199 {
2200 /* Check if the entry is dirty. */
2201 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2202 PDMBLKCACHE_ENTRY_IS_DIRTY,
2203 0))
2204 {
2205 /* If it is already dirty but not in progress just update the data. */
2206 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2207 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2208 else
2209 {
2210 /* The data isn't written to the file yet */
2211 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2212 &SgBuf, offDiff, cbToWrite,
2213 true /* fWrite */);
2214 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2215 }
2216
2217 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2218 }
2219 else /* Dirty bit not set */
2220 {
2221 /*
2222 * Check if a read is in progress for this entry.
2223 * We have to defer processing in that case.
2224 */
2225 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2226 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2227 0))
2228 {
2229 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2230 &SgBuf, offDiff, cbToWrite,
2231 true /* fWrite */);
2232 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2233 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2234 }
2235 else /* I/O in progress flag not set */
2236 {
2237 /* Write as much as we can into the entry and update the file. */
2238 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2239
2240 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2241 if (fCommit)
2242 pdmBlkCacheCommitDirtyEntries(pCache);
2243 }
2244 } /* Dirty bit not set */
2245
2246 /* Move this entry to the top position */
2247 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2248 {
2249 pdmBlkCacheLockEnter(pCache);
2250 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2251 pdmBlkCacheLockLeave(pCache);
2252 }
2253
2254 pdmBlkCacheEntryRelease(pEntry);
2255 }
2256 else /* Entry is on the ghost list */
2257 {
2258 uint8_t *pbBuffer = NULL;
2259
2260 pdmBlkCacheLockEnter(pCache);
2261 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2262 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2263
2264 if (fEnough)
2265 {
2266 /* Move the entry to Am and fetch it to the cache. */
2267 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2268 pdmBlkCacheAdd(pCache, pEntry->cbData);
2269 pdmBlkCacheLockLeave(pCache);
2270
2271 if (pbBuffer)
2272 pEntry->pbData = pbBuffer;
2273 else
2274 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2275 AssertPtr(pEntry->pbData);
2276
2277 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2278 &SgBuf, offDiff, cbToWrite,
2279 true /* fWrite */);
2280 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2281 pdmBlkCacheEntryReadFromMedium(pEntry);
2282
2283 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2284 pdmBlkCacheEntryRelease(pEntry);
2285 }
2286 else
2287 {
2288 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2289 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2290 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2291 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2292 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2293
2294 pdmBlkCacheLockLeave(pCache);
2295
2296 RTMemFree(pEntry);
2297 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2298 &SgBuf, off, cbToWrite,
2299 PDMBLKCACHEXFERDIR_WRITE);
2300 }
2301 }
2302 }
2303 else /* No entry found */
2304 {
2305 /*
2306 * No entry found. Try to create a new cache entry to store the data in and if that fails
2307 * write directly to the file.
2308 */
2309 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2310 off, cbWrite,
2311 &cbToWrite);
2312
2313 cbWrite -= cbToWrite;
2314
2315 if (pEntryNew)
2316 {
2317 uint64_t offDiff = off - pEntryNew->Core.Key;
2318
2319 STAM_COUNTER_INC(&pCache->cHits);
2320
2321 /*
2322 * Check if it is possible to just write the data without waiting
2323 * for it to get fetched first.
2324 */
2325 if (!offDiff && pEntryNew->cbData == cbToWrite)
2326 {
2327 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2328
2329 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2330 if (fCommit)
2331 pdmBlkCacheCommitDirtyEntries(pCache);
2332 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2333 }
2334 else
2335 {
2336 /* Defer the write and fetch the data from the endpoint. */
2337 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2338 &SgBuf, offDiff, cbToWrite,
2339 true /* fWrite */);
2340 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2341 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2342 }
2343
2344 pdmBlkCacheEntryRelease(pEntryNew);
2345 }
2346 else
2347 {
2348 /*
2349 * There is not enough free space in the cache.
2350 * Pass the request directly to the I/O manager.
2351 */
2352 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2353
2354 STAM_COUNTER_INC(&pCache->cMisses);
2355
2356 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2357 &SgBuf, off, cbToWrite,
2358 PDMBLKCACHEXFERDIR_WRITE);
2359 }
2360 }
2361
2362 off += cbToWrite;
2363 }
2364
2365 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2366 rc = VINF_AIO_TASK_PENDING;
2367
2368 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2369
2370 return rc;
2371}
2372
2373VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2374{
2375 int rc = VINF_SUCCESS;
2376 PPDMBLKCACHEREQ pReq;
2377
2378 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2379
2380 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2381 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2382
2383 /* Commit dirty entries in the cache. */
2384 pdmBlkCacheCommit(pBlkCache);
2385
2386 /* Allocate new request structure. */
2387 pReq = pdmBlkCacheReqAlloc(pvUser);
2388 if (RT_UNLIKELY(!pReq))
2389 return VERR_NO_MEMORY;
2390
2391 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2392 PDMBLKCACHEXFERDIR_FLUSH);
2393 AssertRC(rc);
2394
2395 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2396 return VINF_AIO_TASK_PENDING;
2397}
2398
2399VMMR3DECL(int) PDMR3BlkCacheDiscard(PPDMBLKCACHE pBlkCache, PCRTRANGE paRanges,
2400 unsigned cRanges, void *pvUser)
2401{
2402 int rc = VINF_SUCCESS;
2403 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2404 PPDMBLKCACHEENTRY pEntry;
2405 PPDMBLKCACHEREQ pReq;
2406
2407 LogFlowFunc((": pBlkCache=%#p{%s} paRanges=%#p cRanges=%u pvUser=%#p\n",
2408 pBlkCache, pBlkCache->pszId, paRanges, cRanges, pvUser));
2409
2410 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2411 AssertReturn(!pBlkCache->fSuspended, VERR_INVALID_STATE);
2412
2413 /* Allocate new request structure. */
2414 pReq = pdmBlkCacheReqAlloc(pvUser);
2415 if (RT_UNLIKELY(!pReq))
2416 return VERR_NO_MEMORY;
2417
2418 /* Increment data transfer counter to keep the request valid while we access it. */
2419 ASMAtomicIncU32(&pReq->cXfersPending);
2420
2421 for (unsigned i = 0; i < cRanges; i++)
2422 {
2423 uint64_t offCur = paRanges[i].offStart;
2424 size_t cbLeft = paRanges[i].cbRange;
2425
2426 while (cbLeft)
2427 {
2428 size_t cbThisDiscard = 0;
2429
2430 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, offCur);
2431
2432 if (pEntry)
2433 {
2434 /* Write the data into the entry and mark it as dirty */
2435 AssertPtr(pEntry->pList);
2436
2437 uint64_t offDiff = offCur - pEntry->Core.Key;
2438
2439 AssertMsg(offCur >= pEntry->Core.Key,
2440 ("Overflow in calculation offCur=%llu OffsetAligned=%llu\n",
2441 offCur, pEntry->Core.Key));
2442
2443 cbThisDiscard = RT_MIN(pEntry->cbData - offDiff, cbLeft);
2444
2445 /* Ghost lists contain no data. */
2446 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2447 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2448 {
2449 /* Check if the entry is dirty. */
2450 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2451 PDMBLKCACHE_ENTRY_IS_DIRTY,
2452 0))
2453 {
2454 /* If it is dirty but not yet in progress remove it. */
2455 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2456 {
2457 pdmBlkCacheLockEnter(pCache);
2458 pdmBlkCacheEntryRemoveFromList(pEntry);
2459
2460 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2461 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2462 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2463
2464 pdmBlkCacheLockLeave(pCache);
2465
2466 RTMemFree(pEntry);
2467 }
2468 else
2469 {
2470#if 0
2471 /* The data isn't written to the file yet */
2472 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2473 &SgBuf, offDiff, cbToWrite,
2474 true /* fWrite */);
2475 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2476#endif
2477 }
2478
2479 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2480 pdmBlkCacheEntryRelease(pEntry);
2481 }
2482 else /* Dirty bit not set */
2483 {
2484 /*
2485 * Check if a read is in progress for this entry.
2486 * We have to defer processing in that case.
2487 */
2488 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2489 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2490 0))
2491 {
2492#if 0
2493 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2494 &SgBuf, offDiff, cbToWrite,
2495 true /* fWrite */);
2496#endif
2497 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2498 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2499 pdmBlkCacheEntryRelease(pEntry);
2500 }
2501 else /* I/O in progress flag not set */
2502 {
2503 pdmBlkCacheLockEnter(pCache);
2504 pdmBlkCacheEntryRemoveFromList(pEntry);
2505
2506 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2507 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2508 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2509 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2510 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2511
2512 pdmBlkCacheLockLeave(pCache);
2513
2514 RTMemFree(pEntry);
2515 }
2516 } /* Dirty bit not set */
2517 }
2518 else /* Entry is on the ghost list just remove cache entry. */
2519 {
2520 pdmBlkCacheLockEnter(pCache);
2521 pdmBlkCacheEntryRemoveFromList(pEntry);
2522
2523 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2524 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2525 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2526 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2527 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2528
2529 pdmBlkCacheLockLeave(pCache);
2530
2531 RTMemFree(pEntry);
2532 }
2533 }
2534 /* else: no entry found. */
2535
2536 offCur += cbThisDiscard;
2537 cbLeft -= cbThisDiscard;
2538 }
2539 }
2540
2541 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, false))
2542 rc = VINF_AIO_TASK_PENDING;
2543
2544 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2545
2546 return rc;
2547}
2548
2549/**
2550 * Completes a task segment freeing all resources and completes the task handle
2551 * if everything was transferred.
2552 *
2553 * @returns Next task segment handle.
2554 * @param pTaskSeg Task segment to complete.
2555 * @param rc Status code to set.
2556 */
2557static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2558 PPDMBLKCACHEWAITER pWaiter,
2559 int rc)
2560{
2561 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2562 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2563
2564 pdmBlkCacheReqUpdate(pBlkCache, pReq, rc, true);
2565
2566 RTMemFree(pWaiter);
2567
2568 return pNext;
2569}
2570
2571static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2572{
2573 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2574 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2575
2576 /* Reference the entry now as we are clearing the I/O in progress flag
2577 * which protected the entry till now. */
2578 pdmBlkCacheEntryRef(pEntry);
2579
2580 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2581 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2582
2583 /* Process waiting segment list. The data in entry might have changed in-between. */
2584 bool fDirty = false;
2585 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2586 PPDMBLKCACHEWAITER pCurr = pComplete;
2587
2588 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2589 ("The list tail was not updated correctly\n"));
2590 pEntry->pWaitingTail = NULL;
2591 pEntry->pWaitingHead = NULL;
2592
2593 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2594 {
2595 /*
2596 * An error here is difficult to handle as the original request completed already.
2597 * The error is logged for now and the VM is paused.
2598 * If the user continues the entry is written again in the hope
2599 * the user fixed the problem and the next write succeeds.
2600 */
2601 if (RT_FAILURE(rcIoXfer))
2602 {
2603 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2604 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2605
2606 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2607 {
2608 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2609 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2610 "Make sure there is enough free space on the disk and that the disk is working properly. "
2611 "Operation can be resumed afterwards"),
2612 pBlkCache->pszId, rcIoXfer);
2613 AssertRC(rc);
2614 }
2615
2616 /* Mark the entry as dirty again to get it added to the list later on. */
2617 fDirty = true;
2618 }
2619
2620 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2621
2622 while (pCurr)
2623 {
2624 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2625
2626 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2627 fDirty = true;
2628 pCurr = pCurr->pNext;
2629 }
2630 }
2631 else
2632 {
2633 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2634 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2635 ("Invalid flags set\n"));
2636
2637 while (pCurr)
2638 {
2639 if (pCurr->fWrite)
2640 {
2641 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2642 fDirty = true;
2643 }
2644 else
2645 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2646
2647 pCurr = pCurr->pNext;
2648 }
2649 }
2650
2651 bool fCommit = false;
2652 if (fDirty)
2653 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2654
2655 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2656
2657 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2658 pdmBlkCacheEntryRelease(pEntry);
2659
2660 if (fCommit)
2661 pdmBlkCacheCommitDirtyEntries(pCache);
2662
2663 /* Complete waiters now. */
2664 while (pComplete)
2665 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2666}
2667
2668VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2669{
2670 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2671
2672 if (hIoXfer->fIoCache)
2673 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2674 else
2675 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, rcIoXfer, true);
2676 RTMemFree(hIoXfer);
2677}
2678
2679/**
2680 * Callback for the AVL do with all routine. Waits for a cachen entry to finish any pending I/O.
2681 *
2682 * @returns IPRT status code.
2683 * @param pNode The node to destroy.
2684 * @param pvUser Opaque user data.
2685 */
2686static int pdmBlkCacheEntryQuiesce(PAVLRU64NODECORE pNode, void *pvUser)
2687{
2688 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
2689 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
2690 NOREF(pvUser);
2691
2692 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
2693 {
2694 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
2695 pdmBlkCacheEntryRef(pEntry);
2696 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2697
2698 RTThreadSleep(1);
2699
2700 /* Re-enter all locks and drop the reference. */
2701 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2702 pdmBlkCacheEntryRelease(pEntry);
2703 }
2704
2705 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
2706 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
2707
2708 return VINF_SUCCESS;
2709}
2710
2711VMMR3DECL(int) PDMR3BlkCacheSuspend(PPDMBLKCACHE pBlkCache)
2712{
2713 int rc = VINF_SUCCESS;
2714 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2715
2716 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2717
2718 if (!ASMAtomicReadBool(&pBlkCache->pCache->fIoErrorVmSuspended))
2719 pdmBlkCacheCommit(pBlkCache); /* Can issue new I/O requests. */
2720 ASMAtomicXchgBool(&pBlkCache->fSuspended, true);
2721
2722 /* Wait for all I/O to complete. */
2723 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2724 rc = RTAvlrU64DoWithAll(pBlkCache->pTree, true, pdmBlkCacheEntryQuiesce, NULL);
2725 AssertRC(rc);
2726 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2727
2728 return rc;
2729}
2730
2731VMMR3DECL(int) PDMR3BlkCacheResume(PPDMBLKCACHE pBlkCache)
2732{
2733 LogFlowFunc(("pBlkCache=%#p\n", pBlkCache));
2734
2735 AssertPtrReturn(pBlkCache, VERR_INVALID_POINTER);
2736
2737 ASMAtomicXchgBool(&pBlkCache->fSuspended, false);
2738
2739 return VINF_SUCCESS;
2740}
2741
2742VMMR3DECL(int) PDMR3BlkCacheClear(PPDMBLKCACHE pBlkCache)
2743{
2744 int rc = VINF_SUCCESS;
2745 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2746
2747 /*
2748 * Commit all dirty entries now (they are waited on for completion during the
2749 * destruction of the AVL tree below).
2750 * The exception is if the VM was paused because of an I/O error before.
2751 */
2752 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
2753 pdmBlkCacheCommit(pBlkCache);
2754
2755 /* Make sure nobody is accessing the cache while we delete the tree. */
2756 pdmBlkCacheLockEnter(pCache);
2757 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2758 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
2759 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2760
2761 pdmBlkCacheLockLeave(pCache);
2762 return rc;
2763}
2764
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette