VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMBlkCache.cpp@ 34904

Last change on this file since 34904 was 34658, checked in by vboxsync, 14 years ago

BlkCache: Build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 89.6 KB
Line 
1/* $Id: PDMBlkCache.cpp 34658 2010-12-02 20:31:53Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/stam.h>
33#include <VBox/uvm.h>
34#include <VBox/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66#define PDM_BLK_CACHE_SAVED_STATE_VERSION 1
67
68/*******************************************************************************
69* Internal Functions *
70*******************************************************************************/
71
72static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
73 uint64_t off, size_t cbData, uint8_t *pbBuffer);
74static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry);
75
76/**
77 * Decrement the reference counter of the given cache entry.
78 *
79 * @returns nothing.
80 * @param pEntry The entry to release.
81 */
82DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
83{
84 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
85 ASMAtomicDecU32(&pEntry->cRefs);
86}
87
88/**
89 * Increment the reference counter of the given cache entry.
90 *
91 * @returns nothing.
92 * @param pEntry The entry to reference.
93 */
94DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
95{
96 ASMAtomicIncU32(&pEntry->cRefs);
97}
98
99#ifdef DEBUG
100static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
101{
102 /* Amount of cached data should never exceed the maximum amount. */
103 AssertMsg(pCache->cbCached <= pCache->cbMax,
104 ("Current amount of cached data exceeds maximum\n"));
105
106 /* The amount of cached data in the LRU and FRU list should match cbCached */
107 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
108 ("Amount of cached data doesn't match\n"));
109
110 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
111 ("Paged out list exceeds maximum\n"));
112}
113#endif
114
115DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
116{
117 RTCritSectEnter(&pCache->CritSect);
118#ifdef DEBUG
119 pdmBlkCacheValidate(pCache);
120#endif
121}
122
123DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
124{
125#ifdef DEBUG
126 pdmBlkCacheValidate(pCache);
127#endif
128 RTCritSectLeave(&pCache->CritSect);
129}
130
131DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached -= cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
138{
139 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
140 pCache->cbCached += cbAmount;
141}
142
143DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
144{
145 pList->cbCached += cbAmount;
146}
147
148DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
149{
150 pList->cbCached -= cbAmount;
151}
152
153#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
154/**
155 * Checks consistency of a LRU list.
156 *
157 * @returns nothing
158 * @param pList The LRU list to check.
159 * @param pNotInList Element which is not allowed to occur in the list.
160 */
161static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
162{
163 PPDMBLKCACHEENTRY pCurr = pList->pHead;
164
165 /* Check that there are no double entries and no cycles in the list. */
166 while (pCurr)
167 {
168 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
169
170 while (pNext)
171 {
172 AssertMsg(pCurr != pNext,
173 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
174 pCurr, pList));
175 pNext = pNext->pNext;
176 }
177
178 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
179
180 if (!pCurr->pNext)
181 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
182
183 pCurr = pCurr->pNext;
184 }
185}
186#endif
187
188/**
189 * Unlinks a cache entry from the LRU list it is assigned to.
190 *
191 * @returns nothing.
192 * @param pEntry The entry to unlink.
193 */
194static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
195{
196 PPDMBLKLRULIST pList = pEntry->pList;
197 PPDMBLKCACHEENTRY pPrev, pNext;
198
199 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
200
201 AssertPtr(pList);
202
203#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
204 pdmBlkCacheCheckList(pList, NULL);
205#endif
206
207 pPrev = pEntry->pPrev;
208 pNext = pEntry->pNext;
209
210 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
211 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
212
213 if (pPrev)
214 pPrev->pNext = pNext;
215 else
216 {
217 pList->pHead = pNext;
218
219 if (pNext)
220 pNext->pPrev = NULL;
221 }
222
223 if (pNext)
224 pNext->pPrev = pPrev;
225 else
226 {
227 pList->pTail = pPrev;
228
229 if (pPrev)
230 pPrev->pNext = NULL;
231 }
232
233 pEntry->pList = NULL;
234 pEntry->pPrev = NULL;
235 pEntry->pNext = NULL;
236 pdmBlkCacheListSub(pList, pEntry->cbData);
237#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
238 pdmBlkCacheCheckList(pList, pEntry);
239#endif
240}
241
242/**
243 * Adds a cache entry to the given LRU list unlinking it from the currently
244 * assigned list if needed.
245 *
246 * @returns nothing.
247 * @param pList List to the add entry to.
248 * @param pEntry Entry to add.
249 */
250static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
251{
252 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
253#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
254 pdmBlkCacheCheckList(pList, NULL);
255#endif
256
257 /* Remove from old list if needed */
258 if (pEntry->pList)
259 pdmBlkCacheEntryRemoveFromList(pEntry);
260
261 pEntry->pNext = pList->pHead;
262 if (pList->pHead)
263 pList->pHead->pPrev = pEntry;
264 else
265 {
266 Assert(!pList->pTail);
267 pList->pTail = pEntry;
268 }
269
270 pEntry->pPrev = NULL;
271 pList->pHead = pEntry;
272 pdmBlkCacheListAdd(pList, pEntry->cbData);
273 pEntry->pList = pList;
274#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
275 pdmBlkCacheCheckList(pList, NULL);
276#endif
277}
278
279/**
280 * Destroys a LRU list freeing all entries.
281 *
282 * @returns nothing
283 * @param pList Pointer to the LRU list to destroy.
284 *
285 * @note The caller must own the critical section of the cache.
286 */
287static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
288{
289 while (pList->pHead)
290 {
291 PPDMBLKCACHEENTRY pEntry = pList->pHead;
292
293 pList->pHead = pEntry->pNext;
294
295 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
296 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
297
298 RTMemPageFree(pEntry->pbData, pEntry->cbData);
299 RTMemFree(pEntry);
300 }
301}
302
303/**
304 * Tries to remove the given amount of bytes from a given list in the cache
305 * moving the entries to one of the given ghosts lists
306 *
307 * @returns Amount of data which could be freed.
308 * @param pCache Pointer to the global cache data.
309 * @param cbData The amount of the data to free.
310 * @param pListSrc The source list to evict data from.
311 * @param pGhostListSrc The ghost list removed entries should be moved to
312 * NULL if the entry should be freed.
313 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
314 * @param ppbBuf Where to store the address of the buffer if an entry with the
315 * same size was found and fReuseBuffer is true.
316 *
317 * @note This function may return fewer bytes than requested because entries
318 * may be marked as non evictable if they are used for I/O at the
319 * moment.
320 */
321static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
322 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
323 bool fReuseBuffer, uint8_t **ppbBuffer)
324{
325 size_t cbEvicted = 0;
326
327 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
328
329 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
330 AssertMsg( !pGhostListDst
331 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
332 ("Destination list must be NULL or the recently used but paged out list\n"));
333
334 if (fReuseBuffer)
335 {
336 AssertPtr(ppbBuffer);
337 *ppbBuffer = NULL;
338 }
339
340 /* Start deleting from the tail. */
341 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
342
343 while ((cbEvicted < cbData) && pEntry)
344 {
345 PPDMBLKCACHEENTRY pCurr = pEntry;
346
347 pEntry = pEntry->pPrev;
348
349 /* We can't evict pages which are currently in progress or dirty but not in progress */
350 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
351 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
352 {
353 /* Ok eviction candidate. Grab the endpoint semaphore and check again
354 * because somebody else might have raced us. */
355 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
356 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
357
358 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
359 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
360 {
361 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
362
363 if (fReuseBuffer && (pCurr->cbData == cbData))
364 {
365 STAM_COUNTER_INC(&pCache->StatBuffersReused);
366 *ppbBuffer = pCurr->pbData;
367 }
368 else if (pCurr->pbData)
369 RTMemPageFree(pCurr->pbData, pCurr->cbData);
370
371 pCurr->pbData = NULL;
372 cbEvicted += pCurr->cbData;
373
374 pdmBlkCacheEntryRemoveFromList(pCurr);
375 pdmBlkCacheSub(pCache, pCurr->cbData);
376
377 if (pGhostListDst)
378 {
379 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
380
381 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
382
383 /* We have to remove the last entries from the paged out list. */
384 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
385 && pGhostEntFree)
386 {
387 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
388 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
389
390 pGhostEntFree = pGhostEntFree->pPrev;
391
392 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
393
394 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
395 {
396 pdmBlkCacheEntryRemoveFromList(pFree);
397
398 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
399 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
400 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
401
402 RTMemFree(pFree);
403 }
404
405 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
406 }
407
408 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
409 {
410 /* Couldn't remove enough entries. Delete */
411 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
412 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
413 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
414
415 RTMemFree(pCurr);
416 }
417 else
418 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
419 }
420 else
421 {
422 /* Delete the entry from the AVL tree it is assigned to. */
423 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
424 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
425 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
426
427 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
428 RTMemFree(pCurr);
429 }
430 }
431
432 }
433 else
434 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
435 }
436
437 return cbEvicted;
438}
439
440static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
441{
442 size_t cbRemoved = 0;
443
444 if ((pCache->cbCached + cbData) < pCache->cbMax)
445 return true;
446 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
447 {
448 /* Try to evict as many bytes as possible from A1in */
449 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
450 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
451
452 /*
453 * If it was not possible to remove enough entries
454 * try the frequently accessed cache.
455 */
456 if (cbRemoved < cbData)
457 {
458 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
459
460 /*
461 * If we removed something we can't pass the reuse buffer flag anymore because
462 * we don't need to evict that much data
463 */
464 if (!cbRemoved)
465 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
466 NULL, fReuseBuffer, ppbBuffer);
467 else
468 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
469 NULL, false, NULL);
470 }
471 }
472 else
473 {
474 /* We have to remove entries from frequently access list. */
475 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
476 NULL, fReuseBuffer, ppbBuffer);
477 }
478
479 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
480 return (cbRemoved >= cbData);
481}
482
483DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
484{
485 int rc = VINF_SUCCESS;
486
487 switch (pBlkCache->enmType)
488 {
489 case PDMBLKCACHETYPE_DEV:
490 {
491 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
492 pIoXfer->enmXferDir,
493 off, pIoXfer->cbXfer,
494 &pIoXfer->SgBuf, pIoXfer);
495 break;
496 }
497 case PDMBLKCACHETYPE_DRV:
498 {
499 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
500 pIoXfer->enmXferDir,
501 off, pIoXfer->cbXfer,
502 &pIoXfer->SgBuf, pIoXfer);
503 break;
504 }
505 case PDMBLKCACHETYPE_USB:
506 {
507 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
508 pIoXfer->enmXferDir,
509 off, pIoXfer->cbXfer,
510 &pIoXfer->SgBuf, pIoXfer);
511 break;
512 }
513 case PDMBLKCACHETYPE_INTERNAL:
514 {
515 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
516 pIoXfer->enmXferDir,
517 off, pIoXfer->cbXfer,
518 &pIoXfer->SgBuf, pIoXfer);
519 break;
520 }
521 default:
522 AssertMsgFailed(("Unknown block cache type!\n"));
523 }
524
525 return rc;
526}
527
528/**
529 * Initiates a read I/O task for the given entry.
530 *
531 * @returns VBox status code.
532 * @param pEntry The entry to fetch the data to.
533 */
534static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
535{
536 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
537 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
538
539 /* Make sure no one evicts the entry while it is accessed. */
540 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
541
542 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
543 if (RT_UNLIKELY(!pIoXfer))
544 return VERR_NO_MEMORY;
545
546 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
547
548 pIoXfer->fIoCache = true;
549 pIoXfer->pEntry = pEntry;
550 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
551 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
552 pIoXfer->cbXfer = pEntry->cbData;
553 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
554 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
555
556 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
557}
558
559/**
560 * Initiates a write I/O task for the given entry.
561 *
562 * @returns nothing.
563 * @param pEntry The entry to read the data from.
564 */
565static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
566{
567 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
568 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
569
570 /* Make sure no one evicts the entry while it is accessed. */
571 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
572
573 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
574 if (RT_UNLIKELY(!pIoXfer))
575 return VERR_NO_MEMORY;
576
577 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
578
579 pIoXfer->fIoCache = true;
580 pIoXfer->pEntry = pEntry;
581 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
582 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
583 pIoXfer->cbXfer = pEntry->cbData;
584 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
585 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
586
587 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
588}
589
590/**
591 * Passthrough a part of a request directly to the I/O manager
592 * handling the endpoint.
593 *
594 * @returns VBox status code.
595 * @param pEndpoint The endpoint.
596 * @param pTask The task.
597 * @param pIoMemCtx The I/O memory context to use.
598 * @param offStart Offset to start transfer from.
599 * @param cbData Amount of data to transfer.
600 * @param enmTransferType The transfer type (read/write)
601 */
602static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
603 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
604 PDMBLKCACHEXFERDIR enmXferDir)
605{
606
607 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
608 if (RT_UNLIKELY(!pIoXfer))
609 return VERR_NO_MEMORY;
610
611 ASMAtomicIncU32(&pReq->cXfersPending);
612 pIoXfer->fIoCache = false;
613 pIoXfer->pReq = pReq;
614 pIoXfer->cbXfer = cbData;
615 pIoXfer->enmXferDir = enmXferDir;
616 if (pSgBuf)
617 {
618 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
619 RTSgBufAdvance(pSgBuf, cbData);
620 }
621
622 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
623}
624
625/**
626 * Commit a single dirty entry to the endpoint
627 *
628 * @returns nothing
629 * @param pEntry The entry to commit.
630 */
631static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
632{
633 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
634 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
635 ("Invalid flags set for entry %#p\n", pEntry));
636
637 pdmBlkCacheEntryWriteToMedium(pEntry);
638}
639
640/**
641 * Commit all dirty entries for a single endpoint.
642 *
643 * @returns nothing.
644 * @param pBlkCache The endpoint cache to commit.
645 */
646static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
647{
648 uint32_t cbCommitted = 0;
649 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
650
651 /* The list is moved to a new header to reduce locking overhead. */
652 RTLISTNODE ListDirtyNotCommitted;
653 RTSPINLOCKTMP Tmp;
654
655 RTListInit(&ListDirtyNotCommitted);
656 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
657 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
658 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
659
660 if (!RTListIsEmpty(&ListDirtyNotCommitted))
661 {
662 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
663
664 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
665 {
666 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
667 NodeNotCommitted);
668 pdmBlkCacheEntryCommit(pEntry);
669 cbCommitted += pEntry->cbData;
670 RTListNodeRemove(&pEntry->NodeNotCommitted);
671 pEntry = pNext;
672 }
673
674 /* Commit the last endpoint */
675 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
676 pdmBlkCacheEntryCommit(pEntry);
677 RTListNodeRemove(&pEntry->NodeNotCommitted);
678 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
679 ("Committed all entries but list is not empty\n"));
680 }
681
682 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
683 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
684 ("Number of committed bytes exceeds number of dirty bytes\n"));
685 ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
686}
687
688/**
689 * Commit all dirty entries in the cache.
690 *
691 * @returns nothing.
692 * @param pCache The global cache instance.
693 */
694static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
695{
696 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
697
698 if (!fCommitInProgress)
699 {
700 pdmBlkCacheLockEnter(pCache);
701 Assert(!RTListIsEmpty(&pCache->ListUsers));
702
703 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
704 AssertPtr(pBlkCache);
705
706 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
707 {
708 pdmBlkCacheCommit(pBlkCache);
709
710 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
711 NodeCacheUser);
712 }
713
714 /* Commit the last endpoint */
715 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
716 pdmBlkCacheCommit(pBlkCache);
717
718 pdmBlkCacheLockLeave(pCache);
719 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
720 }
721}
722
723/**
724 * Adds the given entry as a dirty to the cache.
725 *
726 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
727 * @param pBlkCache The endpoint cache the entry belongs to.
728 * @param pEntry The entry to add.
729 */
730static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
731{
732 bool fDirtyBytesExceeded = false;
733 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
734
735 /* If the commit timer is disabled we commit right away. */
736 if (pCache->u32CommitTimeoutMs == 0)
737 {
738 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
739 pdmBlkCacheEntryCommit(pEntry);
740 }
741 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
742 {
743 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
744
745 RTSPINLOCKTMP Tmp;
746 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
747 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
748 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
749
750 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
751
752 /* Prevent committing if the VM was suspended. */
753 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
754 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
755 }
756
757 return fDirtyBytesExceeded;
758}
759
760static PPDMBLKCACHE pdmR3BlkCacheFindById(PPDMBLKCACHEGLOBAL pBlkCacheGlobal, const char *pcszId)
761{
762 bool fFound = false;
763 PPDMBLKCACHE pBlkCache = NULL;
764
765 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
766 {
767 if (!RTStrCmp(pBlkCache->pszId, pcszId))
768 {
769 fFound = true;
770 break;
771 }
772 }
773
774 return fFound ? pBlkCache : NULL;
775}
776
777/**
778 * Commit timer callback.
779 */
780static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
781{
782 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
783
784 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
785
786 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
787 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
788 pdmBlkCacheCommitDirtyEntries(pCache);
789
790 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
791 LogFlowFunc(("Entries committed, going to sleep\n"));
792}
793
794static DECLCALLBACK(int) pdmR3BlkCacheSaveExec(PVM pVM, PSSMHANDLE pSSM)
795{
796 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
797
798 AssertPtr(pBlkCacheGlobal);
799
800 pdmBlkCacheLockEnter(pBlkCacheGlobal);
801
802 SSMR3PutU32(pSSM, pBlkCacheGlobal->cRefs);
803
804 /* Go through the list and save all dirty entries. */
805 PPDMBLKCACHE pBlkCache;
806 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
807 {
808 uint32_t cEntries = 0;
809 PPDMBLKCACHEENTRY pEntry;
810
811 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
812 SSMR3PutU32(pSSM, strlen(pBlkCache->pszId));
813 SSMR3PutStrZ(pSSM, pBlkCache->pszId);
814
815 /* Count the number of entries to safe. */
816 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
817 {
818 cEntries++;
819 }
820
821 SSMR3PutU32(pSSM, cEntries);
822
823 /* Walk the list of all dirty entries and save them. */
824 RTListForEach(&pBlkCache->ListDirtyNotCommitted, pEntry, PDMBLKCACHEENTRY, NodeNotCommitted)
825 {
826 /* A few sanity checks. */
827 AssertMsg(!pEntry->cRefs, ("The entry is still referenced\n"));
828 AssertMsg(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY, ("Entry is not dirty\n"));
829 AssertMsg(!(pEntry->fFlags & ~PDMBLKCACHE_ENTRY_IS_DIRTY), ("Invalid flags set\n"));
830 AssertMsg(!pEntry->pWaitingHead && !pEntry->pWaitingTail, ("There are waiting requests\n"));
831 AssertMsg( pEntry->pList == &pBlkCacheGlobal->LruRecentlyUsedIn
832 || pEntry->pList == &pBlkCacheGlobal->LruFrequentlyUsed,
833 ("Invalid list\n"));
834 AssertMsg(pEntry->cbData == pEntry->Core.KeyLast - pEntry->Core.Key + 1,
835 ("Size and range do not match\n"));
836
837 /* Save */
838 SSMR3PutU64(pSSM, pEntry->Core.Key);
839 SSMR3PutU32(pSSM, pEntry->cbData);
840 SSMR3PutMem(pSSM, pEntry->pbData, pEntry->cbData);
841 }
842
843 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
844 }
845
846 pdmBlkCacheLockLeave(pBlkCacheGlobal);
847
848 /* Terminator */
849 return SSMR3PutU32(pSSM, UINT32_MAX);
850}
851
852static DECLCALLBACK(int) pdmR3BlkCacheLoadExec(PVM pVM, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass)
853{
854 int rc = VINF_SUCCESS;
855 uint32_t cRefs;
856 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
857
858 AssertPtr(pBlkCacheGlobal);
859
860 pdmBlkCacheLockEnter(pBlkCacheGlobal);
861
862 if (uVersion != PDM_BLK_CACHE_SAVED_STATE_VERSION)
863 return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION;
864
865 SSMR3GetU32(pSSM, &cRefs);
866
867 if (cRefs == pBlkCacheGlobal->cRefs)
868 {
869 char *pszId = NULL;
870
871 while ( cRefs > 0
872 && RT_SUCCESS(rc))
873 {
874 PPDMBLKCACHE pBlkCache = NULL;
875 uint32_t cbId = 0;
876
877 SSMR3GetU32(pSSM, &cbId);
878 Assert(cbId > 0);
879
880 cbId++; /* Include terminator */
881 pszId = (char *)RTMemAllocZ(cbId * sizeof(char));
882 if (!pszId)
883 {
884 rc = VERR_NO_MEMORY;
885 break;
886 }
887
888 rc = SSMR3GetStrZ(pSSM, pszId, cbId);
889 AssertRC(rc);
890
891 /* Search for the block cache with the provided id. */
892 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pszId);
893 if (!pBlkCache)
894 {
895 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
896 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
897 break;
898 }
899
900 RTStrFree(pszId);
901 pszId = NULL;
902
903 /* Get the entries */
904 uint32_t cEntries;
905 SSMR3GetU32(pSSM, &cEntries);
906
907 while (cEntries > 0)
908 {
909 PPDMBLKCACHEENTRY pEntry;
910 uint64_t off;
911 uint32_t cbEntry;
912
913 SSMR3GetU64(pSSM, &off);
914 SSMR3GetU32(pSSM, &cbEntry);
915
916 pEntry = pdmBlkCacheEntryAlloc(pBlkCache, off, cbEntry, NULL);
917 if (!pEntry)
918 {
919 rc = VERR_NO_MEMORY;
920 break;
921 }
922
923 rc = SSMR3GetMem(pSSM, pEntry->pbData, cbEntry);
924 if (RT_FAILURE(rc))
925 {
926 RTMemFree(pEntry->pbData);
927 RTMemFree(pEntry);
928 break;
929 }
930
931 /* Insert into the tree. */
932 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
933 Assert(fInserted);
934
935 /* Add to the dirty list. */
936 pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
937 pdmBlkCacheEntryAddToList(&pBlkCacheGlobal->LruRecentlyUsedIn, pEntry);
938 pdmBlkCacheAdd(pBlkCacheGlobal, cbEntry);
939 pdmBlkCacheEntryRelease(pEntry);
940 cEntries--;
941 }
942
943 cRefs--;
944 }
945
946 if (pszId)
947 RTStrFree(pszId);
948 }
949 else
950 rc = SSMR3SetCfgError(pSSM, RT_SRC_POS,
951 N_("The VM is missing a block device. Please make sure the source and target VMs have compatible storage configurations"));
952
953 pdmBlkCacheLockLeave(pBlkCacheGlobal);
954
955 if (RT_SUCCESS(rc))
956 {
957 uint32_t u32 = 0;
958 rc = SSMR3GetU32(pSSM, &u32);
959 if (RT_SUCCESS(rc))
960 AssertMsgReturn(u32 == UINT32_MAX, ("%#x\n", u32), VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
961 }
962
963 return rc;
964}
965
966int pdmR3BlkCacheInit(PVM pVM)
967{
968 int rc = VINF_SUCCESS;
969 PUVM pUVM = pVM->pUVM;
970 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
971
972 LogFlowFunc((": pVM=%p\n", pVM));
973
974 VM_ASSERT_EMT(pVM);
975
976 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
977 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
978
979 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
980 if (!pBlkCacheGlobal)
981 return VERR_NO_MEMORY;
982
983 RTListInit(&pBlkCacheGlobal->ListUsers);
984 pBlkCacheGlobal->pVM = pVM;
985 pBlkCacheGlobal->cRefs = 0;
986 pBlkCacheGlobal->cbCached = 0;
987 pBlkCacheGlobal->fCommitInProgress = false;
988
989 /* Initialize members */
990 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
991 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
992 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
993
994 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
995 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
996 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
997
998 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
999 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
1000 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
1001
1002 do
1003 {
1004 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
1005 AssertLogRelRCBreak(rc);
1006 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
1007
1008 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
1009 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
1010 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
1011 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
1012
1013 /** @todo r=aeichner: Experiment to find optimal default values */
1014 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
1015 AssertLogRelRCBreak(rc);
1016 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
1017 AssertLogRelRCBreak(rc);
1018 } while (0);
1019
1020 if (RT_SUCCESS(rc))
1021 {
1022 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
1023 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1024 "/PDM/BlkCache/cbMax",
1025 STAMUNIT_BYTES,
1026 "Maximum cache size");
1027 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
1028 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1029 "/PDM/BlkCache/cbCached",
1030 STAMUNIT_BYTES,
1031 "Currently used cache");
1032 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
1033 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1034 "/PDM/BlkCache/cbCachedMruIn",
1035 STAMUNIT_BYTES,
1036 "Number of bytes cached in MRU list");
1037 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
1038 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1039 "/PDM/BlkCache/cbCachedMruOut",
1040 STAMUNIT_BYTES,
1041 "Number of bytes cached in FRU list");
1042 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
1043 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
1044 "/PDM/BlkCache/cbCachedFru",
1045 STAMUNIT_BYTES,
1046 "Number of bytes cached in FRU ghost list");
1047
1048#ifdef VBOX_WITH_STATISTICS
1049 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
1050 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1051 "/PDM/BlkCache/CacheHits",
1052 STAMUNIT_COUNT, "Number of hits in the cache");
1053 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
1054 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1055 "/PDM/BlkCache/CachePartialHits",
1056 STAMUNIT_COUNT, "Number of partial hits in the cache");
1057 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
1058 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1059 "/PDM/BlkCache/CacheMisses",
1060 STAMUNIT_COUNT, "Number of misses when accessing the cache");
1061 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
1062 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1063 "/PDM/BlkCache/CacheRead",
1064 STAMUNIT_BYTES, "Number of bytes read from the cache");
1065 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
1066 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1067 "/PDM/BlkCache/CacheWritten",
1068 STAMUNIT_BYTES, "Number of bytes written to the cache");
1069 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
1070 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1071 "/PDM/BlkCache/CacheTreeGet",
1072 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
1073 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
1074 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1075 "/PDM/BlkCache/CacheTreeInsert",
1076 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
1077 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
1078 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1079 "/PDM/BlkCache/CacheTreeRemove",
1080 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
1081 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
1082 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1083 "/PDM/BlkCache/CacheBuffersReused",
1084 STAMUNIT_COUNT, "Number of times a buffer could be reused");
1085#endif
1086
1087 /* Initialize the critical section */
1088 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
1089 }
1090
1091 if (RT_SUCCESS(rc))
1092 {
1093 /* Create the commit timer */
1094 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1095 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
1096 pdmBlkCacheCommitTimerCallback,
1097 pBlkCacheGlobal,
1098 "BlkCache-Commit",
1099 &pBlkCacheGlobal->pTimerCommit);
1100
1101 if (RT_SUCCESS(rc))
1102 {
1103 /* Register saved state handler. */
1104 rc = SSMR3RegisterInternal(pVM, "pdmblkcache", 0, PDM_BLK_CACHE_SAVED_STATE_VERSION, pBlkCacheGlobal->cbMax,
1105 NULL, NULL, NULL,
1106 NULL, pdmR3BlkCacheSaveExec, NULL,
1107 NULL, pdmR3BlkCacheLoadExec, NULL);
1108 if (RT_SUCCESS(rc))
1109 {
1110 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
1111 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
1112 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
1113 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
1114 return VINF_SUCCESS;
1115 }
1116 }
1117
1118 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1119 }
1120
1121 if (pBlkCacheGlobal)
1122 RTMemFree(pBlkCacheGlobal);
1123
1124 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
1125 return rc;
1126}
1127
1128void pdmR3BlkCacheTerm(PVM pVM)
1129{
1130 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1131
1132 if (pBlkCacheGlobal)
1133 {
1134 /* Make sure no one else uses the cache now */
1135 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1136
1137 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
1138 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
1139 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
1140 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
1141
1142 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1143
1144 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
1145 RTMemFree(pBlkCacheGlobal);
1146 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
1147 }
1148}
1149
1150int pdmR3BlkCacheResume(PVM pVM)
1151{
1152 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1153
1154 LogFlowFunc(("pVM=%#p\n", pVM));
1155
1156 if ( pBlkCacheGlobal
1157 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
1158 {
1159 /* The VM was suspended because of an I/O error, commit all dirty entries. */
1160 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
1161 }
1162
1163 return VINF_SUCCESS;
1164}
1165
1166static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
1167{
1168 int rc = VINF_SUCCESS;
1169 PPDMBLKCACHE pBlkCache = NULL;
1170 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1171
1172 if (!pBlkCacheGlobal)
1173 return VERR_NOT_SUPPORTED;
1174
1175 /*
1176 * Check that no other user cache has the same id first,
1177 * Unique id's are necessary in case the state is saved.
1178 */
1179 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1180
1181 pBlkCache = pdmR3BlkCacheFindById(pBlkCacheGlobal, pcszId);
1182
1183 if (!pBlkCache)
1184 {
1185 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
1186
1187 if (pBlkCache)
1188 pBlkCache->pszId = RTStrDup(pcszId);
1189
1190 if ( pBlkCache
1191 && pBlkCache->pszId)
1192 {
1193 pBlkCache->pCache = pBlkCacheGlobal;
1194 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1195
1196 rc = RTSpinlockCreate(&pBlkCache->LockList);
1197 if (RT_SUCCESS(rc))
1198 {
1199 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1200 if (RT_SUCCESS(rc))
1201 {
1202 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1203 if (pBlkCache->pTree)
1204 {
1205 /* Arm the timer if this is the first endpoint. */
1206 if ( !pBlkCacheGlobal->cRefs
1207 && pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1208 rc = TMTimerSetMillies(pBlkCacheGlobal->pTimerCommit, pBlkCacheGlobal->u32CommitTimeoutMs);
1209
1210 if (RT_SUCCESS(rc))
1211 {
1212#ifdef VBOX_WITH_STATISTICS
1213 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1214 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1215 STAMUNIT_COUNT, "Number of deferred writes",
1216 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1217#endif
1218
1219 /* Add to the list of users. */
1220 pBlkCacheGlobal->cRefs++;
1221 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1222 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1223
1224 *ppBlkCache = pBlkCache;
1225 LogFlowFunc(("returns success\n"));
1226 return VINF_SUCCESS;
1227 }
1228 }
1229 else
1230 rc = VERR_NO_MEMORY;
1231
1232 RTSemRWDestroy(pBlkCache->SemRWEntries);
1233 }
1234
1235 RTSpinlockDestroy(pBlkCache->LockList);
1236 }
1237
1238 RTStrFree(pBlkCache->pszId);
1239 }
1240 else
1241 rc = VERR_NO_MEMORY;
1242
1243 if (pBlkCache)
1244 RTMemFree(pBlkCache);
1245 }
1246 else
1247 rc = VERR_ALREADY_EXISTS;
1248
1249 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1250
1251 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1252 return rc;
1253}
1254
1255VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1256 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1257 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1258 const char *pcszId)
1259{
1260 int rc = VINF_SUCCESS;
1261 PPDMBLKCACHE pBlkCache;
1262
1263 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1264 if (RT_SUCCESS(rc))
1265 {
1266 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1267 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1268 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1269 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1270 *ppBlkCache = pBlkCache;
1271 }
1272
1273 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1274 return rc;
1275}
1276
1277VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1278 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1279 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1280 const char *pcszId)
1281{
1282 int rc = VINF_SUCCESS;
1283 PPDMBLKCACHE pBlkCache;
1284
1285 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1286 if (RT_SUCCESS(rc))
1287 {
1288 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1289 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1290 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1291 pBlkCache->u.Dev.pDevIns = pDevIns;
1292 *ppBlkCache = pBlkCache;
1293 }
1294
1295 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1296 return rc;
1297
1298}
1299
1300VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1301 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1302 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1303 const char *pcszId)
1304{
1305 int rc = VINF_SUCCESS;
1306 PPDMBLKCACHE pBlkCache;
1307
1308 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1309 if (RT_SUCCESS(rc))
1310 {
1311 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1312 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1313 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1314 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1315 *ppBlkCache = pBlkCache;
1316 }
1317
1318 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1319 return rc;
1320
1321}
1322
1323VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1324 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1325 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1326 const char *pcszId)
1327{
1328 int rc = VINF_SUCCESS;
1329 PPDMBLKCACHE pBlkCache;
1330
1331 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1332 if (RT_SUCCESS(rc))
1333 {
1334 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1335 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1336 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1337 pBlkCache->u.Int.pvUser = pvUser;
1338 *ppBlkCache = pBlkCache;
1339 }
1340
1341 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1342 return rc;
1343
1344}
1345
1346/**
1347 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1348 *
1349 * @returns IPRT status code.
1350 * @param pNode The node to destroy.
1351 * @param pvUser Opaque user data.
1352 */
1353static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1354{
1355 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1356 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1357 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1358
1359 while (ASMAtomicReadU32(&pEntry->fFlags) & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS)
1360 {
1361 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1362 pdmBlkCacheEntryRef(pEntry);
1363 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1364 pdmBlkCacheLockLeave(pCache);
1365
1366 RTThreadSleep(250);
1367
1368 /* Re-enter all locks */
1369 pdmBlkCacheLockEnter(pCache);
1370 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1371 pdmBlkCacheEntryRelease(pEntry);
1372 }
1373
1374 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
1375 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1376
1377 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1378 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1379
1380 pdmBlkCacheEntryRemoveFromList(pEntry);
1381
1382 if (fUpdateCache)
1383 pdmBlkCacheSub(pCache, pEntry->cbData);
1384
1385 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1386 RTMemFree(pEntry);
1387
1388 return VINF_SUCCESS;
1389}
1390
1391/**
1392 * Destroys all cache resources used by the given endpoint.
1393 *
1394 * @returns nothing.
1395 * @param pEndpoint The endpoint to the destroy.
1396 */
1397VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1398{
1399 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1400
1401 /*
1402 * Commit all dirty entries now (they are waited on for completion during the
1403 * destruction of the AVL tree below).
1404 * The exception is if the VM was paused because of an I/O error before.
1405 */
1406 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1407 pdmBlkCacheCommit(pBlkCache);
1408
1409 /* Make sure nobody is accessing the cache while we delete the tree. */
1410 pdmBlkCacheLockEnter(pCache);
1411 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1412 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1413 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1414
1415 RTSpinlockDestroy(pBlkCache->LockList);
1416
1417 pCache->cRefs--;
1418 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1419
1420 if ( !pCache->cRefs
1421 && pCache->u32CommitTimeoutMs > 0)
1422 TMTimerStop(pCache->pTimerCommit);
1423
1424 pdmBlkCacheLockLeave(pCache);
1425
1426 RTSemRWDestroy(pBlkCache->SemRWEntries);
1427
1428#ifdef VBOX_WITH_STATISTICS
1429 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1430#endif
1431
1432 RTStrFree(pBlkCache->pszId);
1433 RTMemFree(pBlkCache);
1434}
1435
1436VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1437{
1438 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1439
1440 /*
1441 * Validate input.
1442 */
1443 if (!pDevIns)
1444 return;
1445 VM_ASSERT_EMT(pVM);
1446
1447 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1448 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1449
1450 /* Return silently if not supported. */
1451 if (!pBlkCacheGlobal)
1452 return;
1453
1454 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1455
1456 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1457 {
1458 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1459 && pBlkCache->u.Dev.pDevIns == pDevIns)
1460 PDMR3BlkCacheRelease(pBlkCache);
1461 }
1462
1463 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1464}
1465
1466VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1467{
1468 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1469
1470 /*
1471 * Validate input.
1472 */
1473 if (!pDrvIns)
1474 return;
1475 VM_ASSERT_EMT(pVM);
1476
1477 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1478 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1479
1480 /* Return silently if not supported. */
1481 if (!pBlkCacheGlobal)
1482 return;
1483
1484 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1485
1486 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1487 {
1488 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1489 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1490 PDMR3BlkCacheRelease(pBlkCache);
1491 }
1492
1493 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1494}
1495
1496VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1497{
1498 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1499
1500 /*
1501 * Validate input.
1502 */
1503 if (!pUsbIns)
1504 return;
1505 VM_ASSERT_EMT(pVM);
1506
1507 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1508 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1509
1510 /* Return silently if not supported. */
1511 if (!pBlkCacheGlobal)
1512 return;
1513
1514 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1515
1516 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1517 {
1518 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1519 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1520 PDMR3BlkCacheRelease(pBlkCache);
1521 }
1522
1523 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1524}
1525
1526static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1527{
1528 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1529 PPDMBLKCACHEENTRY pEntry = NULL;
1530
1531 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1532
1533 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1534 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1535 if (pEntry)
1536 pdmBlkCacheEntryRef(pEntry);
1537 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1538
1539 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1540
1541 return pEntry;
1542}
1543
1544/**
1545 * Return the best fit cache entries for the given offset.
1546 *
1547 * @returns nothing.
1548 * @param pBlkCache The endpoint cache.
1549 * @param off The offset.
1550 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1551 * the given offset. NULL if not required.
1552 */
1553static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1554 PPDMBLKCACHEENTRY *ppEntryAbove)
1555{
1556 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1557
1558 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1559
1560 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1561 if (ppEntryAbove)
1562 {
1563 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1564 if (*ppEntryAbove)
1565 pdmBlkCacheEntryRef(*ppEntryAbove);
1566 }
1567
1568 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1569
1570 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1571}
1572
1573static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1574{
1575 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1576
1577 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1578 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1579 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1580 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1581 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1582 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1583}
1584
1585/**
1586 * Allocates and initializes a new entry for the cache.
1587 * The entry has a reference count of 1.
1588 *
1589 * @returns Pointer to the new cache entry or NULL if out of memory.
1590 * @param pBlkCache The cache the entry belongs to.
1591 * @param off Start offset.
1592 * @param cbData Size of the cache entry.
1593 * @param pbBuffer Pointer to the buffer to use.
1594 * NULL if a new buffer should be allocated.
1595 * The buffer needs to have the same size of the entry.
1596 */
1597static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1598 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1599{
1600 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1601
1602 if (RT_UNLIKELY(!pEntryNew))
1603 return NULL;
1604
1605 pEntryNew->Core.Key = off;
1606 pEntryNew->Core.KeyLast = off + cbData - 1;
1607 pEntryNew->pBlkCache = pBlkCache;
1608 pEntryNew->fFlags = 0;
1609 pEntryNew->cRefs = 1; /* We are using it now. */
1610 pEntryNew->pList = NULL;
1611 pEntryNew->cbData = cbData;
1612 pEntryNew->pWaitingHead = NULL;
1613 pEntryNew->pWaitingTail = NULL;
1614 if (pbBuffer)
1615 pEntryNew->pbData = pbBuffer;
1616 else
1617 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1618
1619 if (RT_UNLIKELY(!pEntryNew->pbData))
1620 {
1621 RTMemFree(pEntryNew);
1622 return NULL;
1623 }
1624
1625 return pEntryNew;
1626}
1627
1628/**
1629 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1630 * in exclusive mode.
1631 *
1632 * @returns true if the flag in fSet is set and the one in fClear is clear.
1633 * false otherwise.
1634 * The R/W semaphore is only held if true is returned.
1635 *
1636 * @param pBlkCache The endpoint cache instance data.
1637 * @param pEntry The entry to check the flags for.
1638 * @param fSet The flag which is tested to be set.
1639 * @param fClear The flag which is tested to be clear.
1640 */
1641DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1642 PPDMBLKCACHEENTRY pEntry,
1643 uint32_t fSet, uint32_t fClear)
1644{
1645 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1646 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1647
1648 if (fPassed)
1649 {
1650 /* Acquire the lock and check again because the completion callback might have raced us. */
1651 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1652
1653 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1654 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1655
1656 /* Drop the lock if we didn't passed the test. */
1657 if (!fPassed)
1658 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1659 }
1660
1661 return fPassed;
1662}
1663
1664/**
1665 * Adds a segment to the waiting list for a cache entry
1666 * which is currently in progress.
1667 *
1668 * @returns nothing.
1669 * @param pEntry The cache entry to add the segment to.
1670 * @param pSeg The segment to add.
1671 */
1672DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1673 PPDMBLKCACHEWAITER pWaiter)
1674{
1675 pWaiter->pNext = NULL;
1676
1677 if (pEntry->pWaitingHead)
1678 {
1679 AssertPtr(pEntry->pWaitingTail);
1680
1681 pEntry->pWaitingTail->pNext = pWaiter;
1682 pEntry->pWaitingTail = pWaiter;
1683 }
1684 else
1685 {
1686 Assert(!pEntry->pWaitingTail);
1687
1688 pEntry->pWaitingHead = pWaiter;
1689 pEntry->pWaitingTail = pWaiter;
1690 }
1691}
1692
1693/**
1694 * Add a buffer described by the I/O memory context
1695 * to the entry waiting for completion.
1696 *
1697 * @returns VBox status code.
1698 * @param pEntry The entry to add the buffer to.
1699 * @param pTask Task associated with the buffer.
1700 * @param pIoMemCtx The memory context to use.
1701 * @param offDiff Offset from the start of the buffer
1702 * in the entry.
1703 * @param cbData Amount of data to wait for onthis entry.
1704 * @param fWrite Flag whether the task waits because it wants to write
1705 * to the cache entry.
1706 */
1707static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1708 PPDMBLKCACHEREQ pReq,
1709 PRTSGBUF pSgBuf, uint64_t offDiff,
1710 size_t cbData, bool fWrite)
1711{
1712 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1713 if (!pWaiter)
1714 return VERR_NO_MEMORY;
1715
1716 ASMAtomicIncU32(&pReq->cXfersPending);
1717 pWaiter->pReq = pReq;
1718 pWaiter->offCacheEntry = offDiff;
1719 pWaiter->cbTransfer = cbData;
1720 pWaiter->fWrite = fWrite;
1721 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1722 RTSgBufAdvance(pSgBuf, cbData);
1723
1724 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1725
1726 return VINF_SUCCESS;
1727}
1728
1729/**
1730 * Calculate aligned offset and size for a new cache entry
1731 * which do not intersect with an already existing entry and the
1732 * file end.
1733 *
1734 * @returns The number of bytes the entry can hold of the requested amount
1735 * of byte.
1736 * @param pEndpoint The endpoint.
1737 * @param pBlkCache The endpoint cache.
1738 * @param off The start offset.
1739 * @param cb The number of bytes the entry needs to hold at least.
1740 * @param uAlignment Alignment of the boundary sizes.
1741 * @param poffAligned Where to store the aligned offset.
1742 * @param pcbAligned Where to store the aligned size of the entry.
1743 */
1744static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1745 uint64_t off, size_t cb,
1746 unsigned uAlignment,
1747 uint64_t *poffAligned, size_t *pcbAligned)
1748{
1749 size_t cbAligned;
1750 size_t cbInEntry = 0;
1751 uint64_t offAligned;
1752 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1753
1754 /* Get the best fit entries around the offset */
1755 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1756
1757 /* Log the info */
1758 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1759 pEntryAbove ? "B" : "No b",
1760 off,
1761 pEntryAbove ? pEntryAbove->Core.Key : 0,
1762 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1763 pEntryAbove ? pEntryAbove->cbData : 0));
1764
1765 offAligned = off;
1766
1767 if ( pEntryAbove
1768 && off + cb > pEntryAbove->Core.Key)
1769 {
1770 cbInEntry = pEntryAbove->Core.Key - off;
1771 cbAligned = pEntryAbove->Core.Key - offAligned;
1772 }
1773 else
1774 {
1775 cbAligned = cb;
1776 cbInEntry = cb;
1777 }
1778
1779 /* A few sanity checks */
1780 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1781 ("Aligned size intersects with another cache entry\n"));
1782 Assert(cbInEntry <= cbAligned);
1783
1784 if (pEntryAbove)
1785 pdmBlkCacheEntryRelease(pEntryAbove);
1786
1787 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1788
1789 *poffAligned = offAligned;
1790 *pcbAligned = cbAligned;
1791
1792 return cbInEntry;
1793}
1794
1795/**
1796 * Create a new cache entry evicting data from the cache if required.
1797 *
1798 * @returns Pointer to the new cache entry or NULL
1799 * if not enough bytes could be evicted from the cache.
1800 * @param pEndpoint The endpoint.
1801 * @param pBlkCache The endpoint cache.
1802 * @param off The offset.
1803 * @param cb Number of bytes the cache entry should have.
1804 * @param uAlignment Alignment the size of the entry should have.
1805 * @param pcbData Where to store the number of bytes the new
1806 * entry can hold. May be lower than actually requested
1807 * due to another entry intersecting the access range.
1808 */
1809static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1810 uint64_t off, size_t cb,
1811 unsigned uAlignment,
1812 size_t *pcbData)
1813{
1814 uint64_t offStart = 0;
1815 size_t cbEntry = 0;
1816 PPDMBLKCACHEENTRY pEntryNew = NULL;
1817 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1818 uint8_t *pbBuffer = NULL;
1819
1820 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1821 &offStart, &cbEntry);
1822
1823 pdmBlkCacheLockEnter(pCache);
1824 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1825
1826 if (fEnough)
1827 {
1828 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1829
1830 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1831 if (RT_LIKELY(pEntryNew))
1832 {
1833 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1834 pdmBlkCacheAdd(pCache, cbEntry);
1835 pdmBlkCacheLockLeave(pCache);
1836
1837 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1838
1839 AssertMsg( (off >= pEntryNew->Core.Key)
1840 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1841 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1842 off, pEntryNew->Core.Key));
1843 }
1844 else
1845 pdmBlkCacheLockLeave(pCache);
1846 }
1847 else
1848 pdmBlkCacheLockLeave(pCache);
1849
1850 return pEntryNew;
1851}
1852
1853static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1854{
1855 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1856
1857 if (RT_LIKELY(pReq))
1858 {
1859 pReq->pvUser = pvUser;
1860 pReq->cbXfer = cbXfer;
1861 pReq->rcReq = VINF_SUCCESS;
1862 pReq->cXfersPending = 0;
1863 }
1864
1865 return pReq;
1866}
1867
1868static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1869{
1870 switch (pBlkCache->enmType)
1871 {
1872 case PDMBLKCACHETYPE_DEV:
1873 {
1874 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1875 pReq->pvUser, pReq->rcReq);
1876 break;
1877 }
1878 case PDMBLKCACHETYPE_DRV:
1879 {
1880 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1881 pReq->pvUser, pReq->rcReq);
1882 break;
1883 }
1884 case PDMBLKCACHETYPE_USB:
1885 {
1886 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1887 pReq->pvUser, pReq->rcReq);
1888 break;
1889 }
1890 case PDMBLKCACHETYPE_INTERNAL:
1891 {
1892 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1893 pReq->pvUser, pReq->rcReq);
1894 break;
1895 }
1896 default:
1897 AssertMsgFailed(("Unknown block cache type!\n"));
1898 }
1899
1900 RTMemFree(pReq);
1901}
1902
1903static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1904 size_t cbComplete, int rcReq, bool fCallHandler)
1905{
1906 if (RT_FAILURE(rcReq))
1907 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1908
1909 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1910 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1911 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1912 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1913
1914 if ( !(cbOld - cbComplete)
1915 && !cXfersPending)
1916 {
1917 if (fCallHandler)
1918 pdmBlkCacheReqComplete(pBlkCache, pReq);
1919 return true;
1920 }
1921
1922 LogFlowFunc(("pReq=%#p cXfersPending=%u cbXfer=%u\n", pReq, cXfersPending, (cbOld - cbComplete)));
1923 return false;
1924}
1925
1926VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1927 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1928{
1929 int rc = VINF_SUCCESS;
1930 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1931 PPDMBLKCACHEENTRY pEntry;
1932 PPDMBLKCACHEREQ pReq;
1933
1934 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1935 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1936
1937 RTSGBUF SgBuf;
1938 RTSgBufClone(&SgBuf, pcSgBuf);
1939
1940 /* Allocate new request structure. */
1941 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1942 if (RT_UNLIKELY(!pReq))
1943 return VERR_NO_MEMORY;
1944
1945 /* Increment data transfer counter to keep the request valid while we access it. */
1946 ASMAtomicIncU32(&pReq->cXfersPending);
1947
1948 while (cbRead)
1949 {
1950 size_t cbToRead;
1951
1952 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1953
1954 /*
1955 * If there is no entry we try to create a new one eviciting unused pages
1956 * if the cache is full. If this is not possible we will pass the request through
1957 * and skip the caching (all entries may be still in progress so they can't
1958 * be evicted)
1959 * If we have an entry it can be in one of the LRU lists where the entry
1960 * contains data (recently used or frequently used LRU) so we can just read
1961 * the data we need and put the entry at the head of the frequently used LRU list.
1962 * In case the entry is in one of the ghost lists it doesn't contain any data.
1963 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1964 */
1965 if (pEntry)
1966 {
1967 uint64_t offDiff = off - pEntry->Core.Key;
1968
1969 AssertMsg(off >= pEntry->Core.Key,
1970 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1971 off, pEntry->Core.Key));
1972
1973 AssertPtr(pEntry->pList);
1974
1975 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1976
1977 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1978 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1979 off, cbToRead));
1980
1981 cbRead -= cbToRead;
1982
1983 if (!cbRead)
1984 STAM_COUNTER_INC(&pCache->cHits);
1985 else
1986 STAM_COUNTER_INC(&pCache->cPartialHits);
1987
1988 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1989
1990 /* Ghost lists contain no data. */
1991 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1992 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1993 {
1994 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1995 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1996 PDMBLKCACHE_ENTRY_IS_DIRTY))
1997 {
1998 /* Entry didn't completed yet. Append to the list */
1999 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2000 &SgBuf, offDiff, cbToRead,
2001 false /* fWrite */);
2002 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2003 }
2004 else
2005 {
2006 /* Read as much as we can from the entry. */
2007 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
2008 ASMAtomicSubU32(&pReq->cbXfer, cbToRead);
2009 }
2010
2011 /* Move this entry to the top position */
2012 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2013 {
2014 pdmBlkCacheLockEnter(pCache);
2015 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2016 pdmBlkCacheLockLeave(pCache);
2017 }
2018 /* Release the entry */
2019 pdmBlkCacheEntryRelease(pEntry);
2020 }
2021 else
2022 {
2023 uint8_t *pbBuffer = NULL;
2024
2025 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
2026
2027 pdmBlkCacheLockEnter(pCache);
2028 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2029 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2030
2031 /* Move the entry to Am and fetch it to the cache. */
2032 if (fEnough)
2033 {
2034 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2035 pdmBlkCacheAdd(pCache, pEntry->cbData);
2036 pdmBlkCacheLockLeave(pCache);
2037
2038 if (pbBuffer)
2039 pEntry->pbData = pbBuffer;
2040 else
2041 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2042 AssertPtr(pEntry->pbData);
2043
2044 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2045 &SgBuf, offDiff, cbToRead,
2046 false /* fWrite */);
2047 pdmBlkCacheEntryReadFromMedium(pEntry);
2048 /* Release the entry */
2049 pdmBlkCacheEntryRelease(pEntry);
2050 }
2051 else
2052 {
2053 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2054 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2055 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2056 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2057 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2058
2059 pdmBlkCacheLockLeave(pCache);
2060
2061 RTMemFree(pEntry);
2062
2063 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2064 &SgBuf, off, cbToRead,
2065 PDMBLKCACHEXFERDIR_READ);
2066 }
2067 }
2068 }
2069 else
2070 {
2071#ifdef VBOX_WITH_IO_READ_CACHE
2072 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
2073 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2074 off, cbRead,
2075 PAGE_SIZE,
2076 &cbToRead);
2077
2078 cbRead -= cbToRead;
2079
2080 if (pEntryNew)
2081 {
2082 if (!cbRead)
2083 STAM_COUNTER_INC(&pCache->cMisses);
2084 else
2085 STAM_COUNTER_INC(&pCache->cPartialHits);
2086
2087 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2088 &SgBuf,
2089 off - pEntryNew->Core.Key,
2090 cbToRead,
2091 false /* fWrite */);
2092 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2093 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
2094 }
2095 else
2096 {
2097 /*
2098 * There is not enough free space in the cache.
2099 * Pass the request directly to the I/O manager.
2100 */
2101 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
2102
2103 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2104 &SgBuf, off, cbToRead,
2105 PDMBLKCACHEXFERDIR_READ);
2106 }
2107#else
2108 /* Clip read size if necessary. */
2109 PPDMBLKCACHEENTRY pEntryAbove;
2110 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
2111
2112 if (pEntryAbove)
2113 {
2114 if (off + cbRead > pEntryAbove->Core.Key)
2115 cbToRead = pEntryAbove->Core.Key - off;
2116 else
2117 cbToRead = cbRead;
2118
2119 pdmBlkCacheEntryRelease(pEntryAbove);
2120 }
2121 else
2122 cbToRead = cbRead;
2123
2124 cbRead -= cbToRead;
2125 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2126 &SgBuf, off, cbToRead,
2127 PDMBLKCACHEXFERDIR_READ);
2128#endif
2129 }
2130 off += cbToRead;
2131 }
2132
2133 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2134 rc = VINF_AIO_TASK_PENDING;
2135
2136 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2137
2138 return rc;
2139}
2140
2141VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
2142 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
2143{
2144 int rc = VINF_SUCCESS;
2145 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2146 PPDMBLKCACHEENTRY pEntry;
2147 PPDMBLKCACHEREQ pReq;
2148
2149 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
2150 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
2151
2152 RTSGBUF SgBuf;
2153 RTSgBufClone(&SgBuf, pcSgBuf);
2154
2155 /* Allocate new request structure. */
2156 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
2157 if (RT_UNLIKELY(!pReq))
2158 return VERR_NO_MEMORY;
2159
2160 /* Increment data transfer counter to keep the request valid while we access it. */
2161 ASMAtomicIncU32(&pReq->cXfersPending);
2162
2163 while (cbWrite)
2164 {
2165 size_t cbToWrite;
2166
2167 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
2168
2169 if (pEntry)
2170 {
2171 /* Write the data into the entry and mark it as dirty */
2172 AssertPtr(pEntry->pList);
2173
2174 uint64_t offDiff = off - pEntry->Core.Key;
2175
2176 AssertMsg(off >= pEntry->Core.Key,
2177 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2178 off, pEntry->Core.Key));
2179
2180 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2181 cbWrite -= cbToWrite;
2182
2183 if (!cbWrite)
2184 STAM_COUNTER_INC(&pCache->cHits);
2185 else
2186 STAM_COUNTER_INC(&pCache->cPartialHits);
2187
2188 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2189
2190 /* Ghost lists contain no data. */
2191 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2192 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2193 {
2194 /* Check if the entry is dirty. */
2195 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2196 PDMBLKCACHE_ENTRY_IS_DIRTY,
2197 0))
2198 {
2199 /* If it is already dirty but not in progress just update the data. */
2200 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2201 {
2202 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2203 cbToWrite);
2204 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2205 }
2206 else
2207 {
2208 /* The data isn't written to the file yet */
2209 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2210 &SgBuf, offDiff, cbToWrite,
2211 true /* fWrite */);
2212 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2213 }
2214
2215 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2216 }
2217 else /* Dirty bit not set */
2218 {
2219 /*
2220 * Check if a read is in progress for this entry.
2221 * We have to defer processing in that case.
2222 */
2223 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2224 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2225 0))
2226 {
2227 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2228 &SgBuf, offDiff, cbToWrite,
2229 true /* fWrite */);
2230 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2231 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2232 }
2233 else /* I/O in progress flag not set */
2234 {
2235 /* Write as much as we can into the entry and update the file. */
2236 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2237 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2238
2239 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2240 if (fCommit)
2241 pdmBlkCacheCommitDirtyEntries(pCache);
2242 }
2243 } /* Dirty bit not set */
2244
2245 /* Move this entry to the top position */
2246 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2247 {
2248 pdmBlkCacheLockEnter(pCache);
2249 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2250 pdmBlkCacheLockLeave(pCache);
2251 }
2252
2253 pdmBlkCacheEntryRelease(pEntry);
2254 }
2255 else /* Entry is on the ghost list */
2256 {
2257 uint8_t *pbBuffer = NULL;
2258
2259 pdmBlkCacheLockEnter(pCache);
2260 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2261 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2262
2263 if (fEnough)
2264 {
2265 /* Move the entry to Am and fetch it to the cache. */
2266 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2267 pdmBlkCacheAdd(pCache, pEntry->cbData);
2268 pdmBlkCacheLockLeave(pCache);
2269
2270 if (pbBuffer)
2271 pEntry->pbData = pbBuffer;
2272 else
2273 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2274 AssertPtr(pEntry->pbData);
2275
2276 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2277 &SgBuf, offDiff, cbToWrite,
2278 true /* fWrite */);
2279 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2280 pdmBlkCacheEntryReadFromMedium(pEntry);
2281
2282 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2283 pdmBlkCacheEntryRelease(pEntry);
2284 }
2285 else
2286 {
2287 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2288 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2289 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2290 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2291 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2292
2293 pdmBlkCacheLockLeave(pCache);
2294
2295 RTMemFree(pEntry);
2296 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2297 &SgBuf, off, cbToWrite,
2298 PDMBLKCACHEXFERDIR_WRITE);
2299 }
2300 }
2301 }
2302 else /* No entry found */
2303 {
2304 /*
2305 * No entry found. Try to create a new cache entry to store the data in and if that fails
2306 * write directly to the file.
2307 */
2308 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2309 off, cbWrite,
2310 512, &cbToWrite);
2311
2312 cbWrite -= cbToWrite;
2313
2314 if (pEntryNew)
2315 {
2316 uint64_t offDiff = off - pEntryNew->Core.Key;
2317
2318 STAM_COUNTER_INC(&pCache->cHits);
2319
2320 /*
2321 * Check if it is possible to just write the data without waiting
2322 * for it to get fetched first.
2323 */
2324 if (!offDiff && pEntryNew->cbData == cbToWrite)
2325 {
2326 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2327 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2328
2329 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2330 if (fCommit)
2331 pdmBlkCacheCommitDirtyEntries(pCache);
2332 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2333 }
2334 else
2335 {
2336 /* Defer the write and fetch the data from the endpoint. */
2337 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2338 &SgBuf, offDiff, cbToWrite,
2339 true /* fWrite */);
2340 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2341 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2342 }
2343
2344 pdmBlkCacheEntryRelease(pEntryNew);
2345 }
2346 else
2347 {
2348 /*
2349 * There is not enough free space in the cache.
2350 * Pass the request directly to the I/O manager.
2351 */
2352 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2353
2354 STAM_COUNTER_INC(&pCache->cMisses);
2355
2356 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2357 &SgBuf, off, cbToWrite,
2358 PDMBLKCACHEXFERDIR_WRITE);
2359 }
2360 }
2361
2362 off += cbToWrite;
2363 }
2364
2365 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2366 rc = VINF_AIO_TASK_PENDING;
2367
2368 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2369
2370 return rc;
2371}
2372
2373VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2374{
2375 int rc = VINF_SUCCESS;
2376 PPDMBLKCACHEREQ pReq;
2377
2378 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2379
2380 /* Commit dirty entries in the cache. */
2381 pdmBlkCacheCommit(pBlkCache);
2382
2383 /* Allocate new request structure. */
2384 pReq = pdmBlkCacheReqAlloc(0, pvUser);
2385 if (RT_UNLIKELY(!pReq))
2386 return VERR_NO_MEMORY;
2387
2388 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2389 PDMBLKCACHEXFERDIR_FLUSH);
2390 AssertRC(rc);
2391
2392 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2393 return VINF_AIO_TASK_PENDING;
2394}
2395
2396/**
2397 * Completes a task segment freeing all resources and completes the task handle
2398 * if everything was transferred.
2399 *
2400 * @returns Next task segment handle.
2401 * @param pTaskSeg Task segment to complete.
2402 * @param rc Status code to set.
2403 */
2404static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2405 PPDMBLKCACHEWAITER pWaiter,
2406 int rc)
2407{
2408 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2409 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2410
2411 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2412
2413 RTMemFree(pWaiter);
2414
2415 return pNext;
2416}
2417
2418static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2419{
2420 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2421 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2422
2423 /* Reference the entry now as we are clearing the I/O in progress flag
2424 * which protected the entry till now. */
2425 pdmBlkCacheEntryRef(pEntry);
2426
2427 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2428 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2429
2430 /* Process waiting segment list. The data in entry might have changed in-between. */
2431 bool fDirty = false;
2432 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2433 PPDMBLKCACHEWAITER pCurr = pComplete;
2434
2435 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2436 ("The list tail was not updated correctly\n"));
2437 pEntry->pWaitingTail = NULL;
2438 pEntry->pWaitingHead = NULL;
2439
2440 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2441 {
2442 /*
2443 * An error here is difficult to handle as the original request completed already.
2444 * The error is logged for now and the VM is paused.
2445 * If the user continues the entry is written again in the hope
2446 * the user fixed the problem and the next write succeeds.
2447 */
2448 if (RT_FAILURE(rcIoXfer))
2449 {
2450 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\" (rc=%Rrc)\n",
2451 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId, rcIoXfer));
2452
2453 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2454 {
2455 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2456 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2457 "Make sure there is enough free space on the disk and that the disk is working properly. "
2458 "Operation can be resumed afterwards"),
2459 pBlkCache->pszId, rcIoXfer);
2460 AssertRC(rc);
2461 }
2462
2463 /*
2464 * The entry is still marked as dirty which prevents eviction.
2465 * Add the waiters to the list again.
2466 */
2467 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY; /* Clear so it gets added to the list again. */
2468 fDirty = true;
2469
2470 if (pComplete)
2471 {
2472 pEntry->pWaitingHead = pComplete;
2473 while (pComplete->pNext)
2474 pComplete = pComplete->pNext;
2475 pEntry->pWaitingTail = pComplete;
2476 pComplete = NULL;
2477 }
2478 }
2479 else
2480 {
2481 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2482
2483 while (pCurr)
2484 {
2485 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2486
2487 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2488 fDirty = true;
2489
2490 pCurr = pCurr->pNext;
2491 }
2492 }
2493 }
2494 else
2495 {
2496 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2497 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2498 ("Invalid flags set\n"));
2499
2500 while (pCurr)
2501 {
2502 if (pCurr->fWrite)
2503 {
2504 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2505 fDirty = true;
2506 }
2507 else
2508 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2509
2510 pCurr = pCurr->pNext;
2511 }
2512 }
2513
2514 bool fCommit = false;
2515 if (fDirty)
2516 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2517
2518 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2519
2520 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2521 pdmBlkCacheEntryRelease(pEntry);
2522
2523 if (fCommit)
2524 pdmBlkCacheCommitDirtyEntries(pCache);
2525
2526 /* Complete waiters now. */
2527 while (pComplete)
2528 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2529}
2530
2531VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2532{
2533 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2534
2535 if (hIoXfer->fIoCache)
2536 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2537 else
2538 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2539}
2540
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette