VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMBlkCache.cpp@ 34406

Last change on this file since 34406 was 34406, checked in by vboxsync, 14 years ago

iprt/list.h: RTListNodeGetFirst/Last -> RTListGetFirst/Last; added RTListGetNext, RTListGetPrev, RTListNodeInsertAfter and RTListNodeInsertBefore.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.0 KB
Line 
1/* $Id: PDMBlkCache.cpp 34406 2010-11-26 16:45:34Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/stam.h>
33#include <VBox/uvm.h>
34#include <VBox/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66/*******************************************************************************
67* Internal Functions *
68*******************************************************************************/
69
70/**
71 * Decrement the reference counter of the given cache entry.
72 *
73 * @returns nothing.
74 * @param pEntry The entry to release.
75 */
76DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
77{
78 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
79 ASMAtomicDecU32(&pEntry->cRefs);
80}
81
82/**
83 * Increment the reference counter of the given cache entry.
84 *
85 * @returns nothing.
86 * @param pEntry The entry to reference.
87 */
88DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
89{
90 ASMAtomicIncU32(&pEntry->cRefs);
91}
92
93#ifdef DEBUG
94static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
95{
96 /* Amount of cached data should never exceed the maximum amount. */
97 AssertMsg(pCache->cbCached <= pCache->cbMax,
98 ("Current amount of cached data exceeds maximum\n"));
99
100 /* The amount of cached data in the LRU and FRU list should match cbCached */
101 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
102 ("Amount of cached data doesn't match\n"));
103
104 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
105 ("Paged out list exceeds maximum\n"));
106}
107#endif
108
109DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
110{
111 RTCritSectEnter(&pCache->CritSect);
112#ifdef DEBUG
113 pdmBlkCacheValidate(pCache);
114#endif
115}
116
117DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
118{
119#ifdef DEBUG
120 pdmBlkCacheValidate(pCache);
121#endif
122 RTCritSectLeave(&pCache->CritSect);
123}
124
125DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
126{
127 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
128 pCache->cbCached -= cbAmount;
129}
130
131DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached += cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
138{
139 pList->cbCached += cbAmount;
140}
141
142DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
143{
144 pList->cbCached -= cbAmount;
145}
146
147#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
148/**
149 * Checks consistency of a LRU list.
150 *
151 * @returns nothing
152 * @param pList The LRU list to check.
153 * @param pNotInList Element which is not allowed to occur in the list.
154 */
155static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
156{
157 PPDMBLKCACHEENTRY pCurr = pList->pHead;
158
159 /* Check that there are no double entries and no cycles in the list. */
160 while (pCurr)
161 {
162 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
163
164 while (pNext)
165 {
166 AssertMsg(pCurr != pNext,
167 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
168 pCurr, pList));
169 pNext = pNext->pNext;
170 }
171
172 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
173
174 if (!pCurr->pNext)
175 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
176
177 pCurr = pCurr->pNext;
178 }
179}
180#endif
181
182/**
183 * Unlinks a cache entry from the LRU list it is assigned to.
184 *
185 * @returns nothing.
186 * @param pEntry The entry to unlink.
187 */
188static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
189{
190 PPDMBLKLRULIST pList = pEntry->pList;
191 PPDMBLKCACHEENTRY pPrev, pNext;
192
193 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
194
195 AssertPtr(pList);
196
197#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
198 pdmBlkCacheCheckList(pList, NULL);
199#endif
200
201 pPrev = pEntry->pPrev;
202 pNext = pEntry->pNext;
203
204 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
205 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
206
207 if (pPrev)
208 pPrev->pNext = pNext;
209 else
210 {
211 pList->pHead = pNext;
212
213 if (pNext)
214 pNext->pPrev = NULL;
215 }
216
217 if (pNext)
218 pNext->pPrev = pPrev;
219 else
220 {
221 pList->pTail = pPrev;
222
223 if (pPrev)
224 pPrev->pNext = NULL;
225 }
226
227 pEntry->pList = NULL;
228 pEntry->pPrev = NULL;
229 pEntry->pNext = NULL;
230 pdmBlkCacheListSub(pList, pEntry->cbData);
231#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
232 pdmBlkCacheCheckList(pList, pEntry);
233#endif
234}
235
236/**
237 * Adds a cache entry to the given LRU list unlinking it from the currently
238 * assigned list if needed.
239 *
240 * @returns nothing.
241 * @param pList List to the add entry to.
242 * @param pEntry Entry to add.
243 */
244static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
245{
246 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
247#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
248 pdmBlkCacheCheckList(pList, NULL);
249#endif
250
251 /* Remove from old list if needed */
252 if (pEntry->pList)
253 pdmBlkCacheEntryRemoveFromList(pEntry);
254
255 pEntry->pNext = pList->pHead;
256 if (pList->pHead)
257 pList->pHead->pPrev = pEntry;
258 else
259 {
260 Assert(!pList->pTail);
261 pList->pTail = pEntry;
262 }
263
264 pEntry->pPrev = NULL;
265 pList->pHead = pEntry;
266 pdmBlkCacheListAdd(pList, pEntry->cbData);
267 pEntry->pList = pList;
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, NULL);
270#endif
271}
272
273/**
274 * Destroys a LRU list freeing all entries.
275 *
276 * @returns nothing
277 * @param pList Pointer to the LRU list to destroy.
278 *
279 * @note The caller must own the critical section of the cache.
280 */
281static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
282{
283 while (pList->pHead)
284 {
285 PPDMBLKCACHEENTRY pEntry = pList->pHead;
286
287 pList->pHead = pEntry->pNext;
288
289 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
290 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
291
292 RTMemPageFree(pEntry->pbData, pEntry->cbData);
293 RTMemFree(pEntry);
294 }
295}
296
297/**
298 * Tries to remove the given amount of bytes from a given list in the cache
299 * moving the entries to one of the given ghosts lists
300 *
301 * @returns Amount of data which could be freed.
302 * @param pCache Pointer to the global cache data.
303 * @param cbData The amount of the data to free.
304 * @param pListSrc The source list to evict data from.
305 * @param pGhostListSrc The ghost list removed entries should be moved to
306 * NULL if the entry should be freed.
307 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
308 * @param ppbBuf Where to store the address of the buffer if an entry with the
309 * same size was found and fReuseBuffer is true.
310 *
311 * @note This function may return fewer bytes than requested because entries
312 * may be marked as non evictable if they are used for I/O at the
313 * moment.
314 */
315static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
316 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
317 bool fReuseBuffer, uint8_t **ppbBuffer)
318{
319 size_t cbEvicted = 0;
320
321 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
322
323 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
324 AssertMsg( !pGhostListDst
325 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
326 ("Destination list must be NULL or the recently used but paged out list\n"));
327
328 if (fReuseBuffer)
329 {
330 AssertPtr(ppbBuffer);
331 *ppbBuffer = NULL;
332 }
333
334 /* Start deleting from the tail. */
335 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
336
337 while ((cbEvicted < cbData) && pEntry)
338 {
339 PPDMBLKCACHEENTRY pCurr = pEntry;
340
341 pEntry = pEntry->pPrev;
342
343 /* We can't evict pages which are currently in progress or dirty but not in progress */
344 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
345 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
346 {
347 /* Ok eviction candidate. Grab the endpoint semaphore and check again
348 * because somebody else might have raced us. */
349 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
350 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
351
352 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
353 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
354 {
355 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
356
357 if (fReuseBuffer && (pCurr->cbData == cbData))
358 {
359 STAM_COUNTER_INC(&pCache->StatBuffersReused);
360 *ppbBuffer = pCurr->pbData;
361 }
362 else if (pCurr->pbData)
363 RTMemPageFree(pCurr->pbData, pCurr->cbData);
364
365 pCurr->pbData = NULL;
366 cbEvicted += pCurr->cbData;
367
368 pdmBlkCacheEntryRemoveFromList(pCurr);
369 pdmBlkCacheSub(pCache, pCurr->cbData);
370
371 if (pGhostListDst)
372 {
373 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
374
375 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
376
377 /* We have to remove the last entries from the paged out list. */
378 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
379 && pGhostEntFree)
380 {
381 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
382 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
383
384 pGhostEntFree = pGhostEntFree->pPrev;
385
386 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
387
388 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
389 {
390 pdmBlkCacheEntryRemoveFromList(pFree);
391
392 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
393 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
394 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
395
396 RTMemFree(pFree);
397 }
398
399 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
400 }
401
402 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
403 {
404 /* Couldn't remove enough entries. Delete */
405 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
406 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
407 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
408
409 RTMemFree(pCurr);
410 }
411 else
412 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
413 }
414 else
415 {
416 /* Delete the entry from the AVL tree it is assigned to. */
417 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
418 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
419 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
420
421 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
422 RTMemFree(pCurr);
423 }
424 }
425
426 }
427 else
428 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
429 }
430
431 return cbEvicted;
432}
433
434static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
435{
436 size_t cbRemoved = 0;
437
438 if ((pCache->cbCached + cbData) < pCache->cbMax)
439 return true;
440 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
441 {
442 /* Try to evict as many bytes as possible from A1in */
443 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
444 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
445
446 /*
447 * If it was not possible to remove enough entries
448 * try the frequently accessed cache.
449 */
450 if (cbRemoved < cbData)
451 {
452 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
453
454 /*
455 * If we removed something we can't pass the reuse buffer flag anymore because
456 * we don't need to evict that much data
457 */
458 if (!cbRemoved)
459 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
460 NULL, fReuseBuffer, ppbBuffer);
461 else
462 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
463 NULL, false, NULL);
464 }
465 }
466 else
467 {
468 /* We have to remove entries from frequently access list. */
469 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
470 NULL, fReuseBuffer, ppbBuffer);
471 }
472
473 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
474 return (cbRemoved >= cbData);
475}
476
477DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
478{
479 int rc = VINF_SUCCESS;
480
481 switch (pBlkCache->enmType)
482 {
483 case PDMBLKCACHETYPE_DEV:
484 {
485 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
486 pIoXfer->enmXferDir,
487 off, pIoXfer->cbXfer,
488 &pIoXfer->SgBuf, pIoXfer);
489 break;
490 }
491 case PDMBLKCACHETYPE_DRV:
492 {
493 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
494 pIoXfer->enmXferDir,
495 off, pIoXfer->cbXfer,
496 &pIoXfer->SgBuf, pIoXfer);
497 break;
498 }
499 case PDMBLKCACHETYPE_USB:
500 {
501 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
502 pIoXfer->enmXferDir,
503 off, pIoXfer->cbXfer,
504 &pIoXfer->SgBuf, pIoXfer);
505 break;
506 }
507 case PDMBLKCACHETYPE_INTERNAL:
508 {
509 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
510 pIoXfer->enmXferDir,
511 off, pIoXfer->cbXfer,
512 &pIoXfer->SgBuf, pIoXfer);
513 break;
514 }
515 default:
516 AssertMsgFailed(("Unknown block cache type!\n"));
517 }
518
519 return rc;
520}
521
522/**
523 * Initiates a read I/O task for the given entry.
524 *
525 * @returns VBox status code.
526 * @param pEntry The entry to fetch the data to.
527 */
528static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
529{
530 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
531 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
532
533 /* Make sure no one evicts the entry while it is accessed. */
534 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
535
536 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
537 if (RT_UNLIKELY(!pIoXfer))
538 return VERR_NO_MEMORY;
539
540 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
541
542 pIoXfer->fIoCache = true;
543 pIoXfer->pEntry = pEntry;
544 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
545 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
546 pIoXfer->cbXfer = pEntry->cbData;
547 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
548 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
549
550 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
551}
552
553/**
554 * Initiates a write I/O task for the given entry.
555 *
556 * @returns nothing.
557 * @param pEntry The entry to read the data from.
558 */
559static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
560{
561 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
562 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
563
564 /* Make sure no one evicts the entry while it is accessed. */
565 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
566
567 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
568 if (RT_UNLIKELY(!pIoXfer))
569 return VERR_NO_MEMORY;
570
571 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
572
573 pIoXfer->fIoCache = true;
574 pIoXfer->pEntry = pEntry;
575 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
576 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
577 pIoXfer->cbXfer = pEntry->cbData;
578 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
579 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
580
581 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
582}
583
584/**
585 * Passthrough a part of a request directly to the I/O manager
586 * handling the endpoint.
587 *
588 * @returns VBox status code.
589 * @param pEndpoint The endpoint.
590 * @param pTask The task.
591 * @param pIoMemCtx The I/O memory context to use.
592 * @param offStart Offset to start transfer from.
593 * @param cbData Amount of data to transfer.
594 * @param enmTransferType The transfer type (read/write)
595 */
596static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
597 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
598 PDMBLKCACHEXFERDIR enmXferDir)
599{
600
601 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
602 if (RT_UNLIKELY(!pIoXfer))
603 return VERR_NO_MEMORY;
604
605 ASMAtomicIncU32(&pReq->cXfersPending);
606 pIoXfer->fIoCache = false;
607 pIoXfer->pReq = pReq;
608 pIoXfer->cbXfer = cbData;
609 pIoXfer->enmXferDir = enmXferDir;
610 if (pSgBuf)
611 {
612 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
613 RTSgBufAdvance(pSgBuf, cbData);
614 }
615
616 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
617}
618
619/**
620 * Commit a single dirty entry to the endpoint
621 *
622 * @returns nothing
623 * @param pEntry The entry to commit.
624 */
625static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
626{
627 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
628 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
629 ("Invalid flags set for entry %#p\n", pEntry));
630
631 pdmBlkCacheEntryWriteToMedium(pEntry);
632}
633
634/**
635 * Commit all dirty entries for a single endpoint.
636 *
637 * @returns nothing.
638 * @param pBlkCache The endpoint cache to commit.
639 */
640static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
641{
642 uint32_t cbCommitted = 0;
643 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
644
645 /* The list is moved to a new header to reduce locking overhead. */
646 RTLISTNODE ListDirtyNotCommitted;
647 RTSPINLOCKTMP Tmp;
648
649 RTListInit(&ListDirtyNotCommitted);
650 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
651 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
652 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
653
654 if (!RTListIsEmpty(&ListDirtyNotCommitted))
655 {
656 PPDMBLKCACHEENTRY pEntry = RTListGetFirst(&ListDirtyNotCommitted, PDMBLKCACHEENTRY, NodeNotCommitted);
657
658 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
659 {
660 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
661 NodeNotCommitted);
662 pdmBlkCacheEntryCommit(pEntry);
663 cbCommitted += pEntry->cbData;
664 RTListNodeRemove(&pEntry->NodeNotCommitted);
665 pEntry = pNext;
666 }
667
668 /* Commit the last endpoint */
669 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
670 pdmBlkCacheEntryCommit(pEntry);
671 RTListNodeRemove(&pEntry->NodeNotCommitted);
672 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
673 ("Committed all entries but list is not empty\n"));
674 }
675
676 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
677 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
678 ("Number of committed bytes exceeds number of dirty bytes\n"));
679 ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
680}
681
682/**
683 * Commit all dirty entries in the cache.
684 *
685 * @returns nothing.
686 * @param pCache The global cache instance.
687 */
688static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
689{
690 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
691
692 if (!fCommitInProgress)
693 {
694 pdmBlkCacheLockEnter(pCache);
695 Assert(!RTListIsEmpty(&pCache->ListUsers));
696
697 PPDMBLKCACHE pBlkCache = RTListGetFirst(&pCache->ListUsers, PDMBLKCACHE, NodeCacheUser);
698 AssertPtr(pBlkCache);
699
700 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
701 {
702 pdmBlkCacheCommit(pBlkCache);
703
704 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
705 NodeCacheUser);
706 }
707
708 /* Commit the last endpoint */
709 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
710 pdmBlkCacheCommit(pBlkCache);
711
712 pdmBlkCacheLockLeave(pCache);
713 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
714 }
715}
716
717/**
718 * Adds the given entry as a dirty to the cache.
719 *
720 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
721 * @param pBlkCache The endpoint cache the entry belongs to.
722 * @param pEntry The entry to add.
723 */
724static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
725{
726 bool fDirtyBytesExceeded = false;
727 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
728
729 /* If the commit timer is disabled we commit right away. */
730 if (pCache->u32CommitTimeoutMs == 0)
731 {
732 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
733 pdmBlkCacheEntryCommit(pEntry);
734 }
735 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
736 {
737 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
738
739 RTSPINLOCKTMP Tmp;
740 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
741 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
742 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
743
744 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
745
746 /* Prevent committing if the VM was suspended. */
747 if (RT_LIKELY(!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended)))
748 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
749 }
750
751 return fDirtyBytesExceeded;
752}
753
754/**
755 * Commit timer callback.
756 */
757static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
758{
759 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
760
761 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
762
763 if ( ASMAtomicReadU32(&pCache->cbDirty) > 0
764 && !ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
765 pdmBlkCacheCommitDirtyEntries(pCache);
766
767 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
768 LogFlowFunc(("Entries committed, going to sleep\n"));
769}
770
771int pdmR3BlkCacheInit(PVM pVM)
772{
773 int rc = VINF_SUCCESS;
774 PUVM pUVM = pVM->pUVM;
775 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
776
777 LogFlowFunc((": pVM=%p\n", pVM));
778
779 VM_ASSERT_EMT(pVM);
780
781 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
782 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
783
784 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
785 if (!pBlkCacheGlobal)
786 return VERR_NO_MEMORY;
787
788 RTListInit(&pBlkCacheGlobal->ListUsers);
789 pBlkCacheGlobal->pVM = pVM;
790 pBlkCacheGlobal->cRefs = 0;
791 pBlkCacheGlobal->cbCached = 0;
792 pBlkCacheGlobal->fCommitInProgress = false;
793
794 /* Initialize members */
795 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
796 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
797 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
798
799 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
800 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
801 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
802
803 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
804 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
805 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
806
807 do
808 {
809 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
810 AssertLogRelRCBreak(rc);
811 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
812
813 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
814 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
815 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
816 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
817
818 /** @todo r=aeichner: Experiment to find optimal default values */
819 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
820 AssertLogRelRCBreak(rc);
821 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
822 AssertLogRelRCBreak(rc);
823 } while (0);
824
825 if (RT_SUCCESS(rc))
826 {
827 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
828 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
829 "/PDM/BlkCache/cbMax",
830 STAMUNIT_BYTES,
831 "Maximum cache size");
832 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
833 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
834 "/PDM/BlkCache/cbCached",
835 STAMUNIT_BYTES,
836 "Currently used cache");
837 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
838 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
839 "/PDM/BlkCache/cbCachedMruIn",
840 STAMUNIT_BYTES,
841 "Number of bytes cached in MRU list");
842 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
843 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
844 "/PDM/BlkCache/cbCachedMruOut",
845 STAMUNIT_BYTES,
846 "Number of bytes cached in FRU list");
847 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
848 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
849 "/PDM/BlkCache/cbCachedFru",
850 STAMUNIT_BYTES,
851 "Number of bytes cached in FRU ghost list");
852
853#ifdef VBOX_WITH_STATISTICS
854 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
855 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
856 "/PDM/BlkCache/CacheHits",
857 STAMUNIT_COUNT, "Number of hits in the cache");
858 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
859 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
860 "/PDM/BlkCache/CachePartialHits",
861 STAMUNIT_COUNT, "Number of partial hits in the cache");
862 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
863 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
864 "/PDM/BlkCache/CacheMisses",
865 STAMUNIT_COUNT, "Number of misses when accessing the cache");
866 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
867 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
868 "/PDM/BlkCache/CacheRead",
869 STAMUNIT_BYTES, "Number of bytes read from the cache");
870 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
871 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
872 "/PDM/BlkCache/CacheWritten",
873 STAMUNIT_BYTES, "Number of bytes written to the cache");
874 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
875 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
876 "/PDM/BlkCache/CacheTreeGet",
877 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
878 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
879 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
880 "/PDM/BlkCache/CacheTreeInsert",
881 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
882 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
883 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
884 "/PDM/BlkCache/CacheTreeRemove",
885 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
886 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
887 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
888 "/PDM/BlkCache/CacheBuffersReused",
889 STAMUNIT_COUNT, "Number of times a buffer could be reused");
890#endif
891
892 /* Initialize the critical section */
893 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
894 }
895
896 if (RT_SUCCESS(rc))
897 {
898 /* Create the commit timer */
899 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
900 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
901 pdmBlkCacheCommitTimerCallback,
902 pBlkCacheGlobal,
903 "BlkCache-Commit",
904 &pBlkCacheGlobal->pTimerCommit);
905
906 if (RT_SUCCESS(rc))
907 {
908 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
909 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
910 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
911 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
912 return VINF_SUCCESS;
913 }
914
915 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
916 }
917
918 if (pBlkCacheGlobal)
919 RTMemFree(pBlkCacheGlobal);
920
921 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
922 return rc;
923}
924
925void pdmR3BlkCacheTerm(PVM pVM)
926{
927 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
928
929 if (pBlkCacheGlobal)
930 {
931 /* Make sure no one else uses the cache now */
932 pdmBlkCacheLockEnter(pBlkCacheGlobal);
933
934 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
935 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
936 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
937 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
938
939 pdmBlkCacheLockLeave(pBlkCacheGlobal);
940
941 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
942 RTMemFree(pBlkCacheGlobal);
943 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
944 }
945}
946
947int pdmR3BlkCacheResume(PVM pVM)
948{
949 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
950
951 LogFlowFunc(("pVM=%#p\n", pVM));
952
953 if ( pBlkCacheGlobal
954 && ASMAtomicXchgBool(&pBlkCacheGlobal->fIoErrorVmSuspended, false))
955 {
956 /* The VM was suspended because of an I/O error, commit all dirty entries. */
957 pdmBlkCacheCommitDirtyEntries(pBlkCacheGlobal);
958 }
959
960 return VINF_SUCCESS;
961}
962
963static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
964{
965 int rc = VINF_SUCCESS;
966 PPDMBLKCACHE pBlkCache = NULL;
967 bool fAlreadyExists = false;
968 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
969
970 if (!pBlkCacheGlobal)
971 return VERR_NOT_SUPPORTED;
972
973 /*
974 * Check that no other user cache has the same id first,
975 * Unique id's are necessary in case the state is saved.
976 */
977 pdmBlkCacheLockEnter(pBlkCacheGlobal);
978
979 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
980 {
981 if (!RTStrCmp(pBlkCache->pszId, pcszId))
982 {
983 fAlreadyExists = true;
984 break;
985 }
986 }
987
988 if (!fAlreadyExists)
989 {
990 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
991
992 if (pBlkCache)
993 pBlkCache->pszId = RTStrDup(pcszId);
994
995 if ( pBlkCache
996 && pBlkCache->pszId)
997 {
998 pBlkCache->pCache = pBlkCacheGlobal;
999 RTListInit(&pBlkCache->ListDirtyNotCommitted);
1000
1001 rc = RTSpinlockCreate(&pBlkCache->LockList);
1002 if (RT_SUCCESS(rc))
1003 {
1004 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
1005 if (RT_SUCCESS(rc))
1006 {
1007 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1008 if (pBlkCache->pTree)
1009 {
1010 /* Arm the timer if this is the first endpoint. */
1011 if ( !pBlkCacheGlobal->cRefs
1012 && pBlkCacheGlobal->u32CommitTimeoutMs > 0)
1013 rc = TMTimerSetMillies(pBlkCacheGlobal->pTimerCommit, pBlkCacheGlobal->u32CommitTimeoutMs);
1014
1015 if (RT_SUCCESS(rc))
1016 {
1017#ifdef VBOX_WITH_STATISTICS
1018 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1019 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1020 STAMUNIT_COUNT, "Number of deferred writes",
1021 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1022#endif
1023
1024 /* Add to the list of users. */
1025 pBlkCacheGlobal->cRefs++;
1026 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1027 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1028
1029 *ppBlkCache = pBlkCache;
1030 LogFlowFunc(("returns success\n"));
1031 return VINF_SUCCESS;
1032 }
1033 }
1034 else
1035 rc = VERR_NO_MEMORY;
1036
1037 RTSemRWDestroy(pBlkCache->SemRWEntries);
1038 }
1039
1040 RTSpinlockDestroy(pBlkCache->LockList);
1041 }
1042
1043 RTStrFree(pBlkCache->pszId);
1044 }
1045 else
1046 rc = VERR_NO_MEMORY;
1047
1048 if (pBlkCache)
1049 RTMemFree(pBlkCache);
1050 }
1051 else
1052 rc = VERR_ALREADY_EXISTS;
1053
1054 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1055
1056 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1057 return rc;
1058}
1059
1060VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1061 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1062 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1063 const char *pcszId)
1064{
1065 int rc = VINF_SUCCESS;
1066 PPDMBLKCACHE pBlkCache;
1067
1068 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1069 if (RT_SUCCESS(rc))
1070 {
1071 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1072 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1073 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1074 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1075 *ppBlkCache = pBlkCache;
1076 }
1077
1078 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1079 return rc;
1080}
1081
1082VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1083 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1084 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1085 const char *pcszId)
1086{
1087 int rc = VINF_SUCCESS;
1088 PPDMBLKCACHE pBlkCache;
1089
1090 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1091 if (RT_SUCCESS(rc))
1092 {
1093 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1094 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1095 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1096 pBlkCache->u.Dev.pDevIns = pDevIns;
1097 *ppBlkCache = pBlkCache;
1098 }
1099
1100 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1101 return rc;
1102
1103}
1104
1105VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1106 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1107 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1108 const char *pcszId)
1109{
1110 int rc = VINF_SUCCESS;
1111 PPDMBLKCACHE pBlkCache;
1112
1113 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1114 if (RT_SUCCESS(rc))
1115 {
1116 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1117 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1118 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1119 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1120 *ppBlkCache = pBlkCache;
1121 }
1122
1123 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1124 return rc;
1125
1126}
1127
1128VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1129 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1130 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1131 const char *pcszId)
1132{
1133 int rc = VINF_SUCCESS;
1134 PPDMBLKCACHE pBlkCache;
1135
1136 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1137 if (RT_SUCCESS(rc))
1138 {
1139 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1140 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1141 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1142 pBlkCache->u.Int.pvUser = pvUser;
1143 *ppBlkCache = pBlkCache;
1144 }
1145
1146 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1147 return rc;
1148
1149}
1150
1151/**
1152 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1153 *
1154 * @returns IPRT status code.
1155 * @param pNode The node to destroy.
1156 * @param pvUser Opaque user data.
1157 */
1158static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1159{
1160 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1161 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1162 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1163
1164 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY))
1165 {
1166 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1167 pdmBlkCacheEntryRef(pEntry);
1168 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1169 pdmBlkCacheLockLeave(pCache);
1170
1171 RTThreadSleep(250);
1172
1173 /* Re-enter all locks */
1174 pdmBlkCacheLockEnter(pCache);
1175 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1176 pdmBlkCacheEntryRelease(pEntry);
1177 }
1178
1179 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
1180 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1181
1182 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1183 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1184
1185 pdmBlkCacheEntryRemoveFromList(pEntry);
1186
1187 if (fUpdateCache)
1188 pdmBlkCacheSub(pCache, pEntry->cbData);
1189
1190 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1191 RTMemFree(pEntry);
1192
1193 return VINF_SUCCESS;
1194}
1195
1196/**
1197 * Destroys all cache resources used by the given endpoint.
1198 *
1199 * @returns nothing.
1200 * @param pEndpoint The endpoint to the destroy.
1201 */
1202VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1203{
1204 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1205
1206 /*
1207 * Commit all dirty entries now (they are waited on for completion during the
1208 * destruction of the AVL tree below).
1209 * The exception is if the VM was paused because of an I/O error before.
1210 */
1211 if (!ASMAtomicReadBool(&pCache->fIoErrorVmSuspended))
1212 pdmBlkCacheCommit(pBlkCache);
1213
1214 /* Make sure nobody is accessing the cache while we delete the tree. */
1215 pdmBlkCacheLockEnter(pCache);
1216 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1217 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1218 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1219
1220 RTSpinlockDestroy(pBlkCache->LockList);
1221
1222 pCache->cRefs--;
1223 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1224
1225 if ( !pCache->cRefs
1226 && pCache->u32CommitTimeoutMs > 0)
1227 TMTimerStop(pCache->pTimerCommit);
1228
1229 pdmBlkCacheLockLeave(pCache);
1230
1231 RTSemRWDestroy(pBlkCache->SemRWEntries);
1232
1233#ifdef VBOX_WITH_STATISTICS
1234 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1235#endif
1236
1237 RTStrFree(pBlkCache->pszId);
1238 RTMemFree(pBlkCache);
1239}
1240
1241VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1242{
1243 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1244
1245 /*
1246 * Validate input.
1247 */
1248 if (!pDevIns)
1249 return;
1250 VM_ASSERT_EMT(pVM);
1251
1252 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1253 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1254
1255 /* Return silently if not supported. */
1256 if (!pBlkCacheGlobal)
1257 return;
1258
1259 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1260
1261 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1262 {
1263 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1264 && pBlkCache->u.Dev.pDevIns == pDevIns)
1265 PDMR3BlkCacheRelease(pBlkCache);
1266 }
1267
1268 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1269}
1270
1271VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1272{
1273 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1274
1275 /*
1276 * Validate input.
1277 */
1278 if (!pDrvIns)
1279 return;
1280 VM_ASSERT_EMT(pVM);
1281
1282 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1283 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1284
1285 /* Return silently if not supported. */
1286 if (!pBlkCacheGlobal)
1287 return;
1288
1289 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1290
1291 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1292 {
1293 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1294 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1295 PDMR3BlkCacheRelease(pBlkCache);
1296 }
1297
1298 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1299}
1300
1301VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1302{
1303 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1304
1305 /*
1306 * Validate input.
1307 */
1308 if (!pUsbIns)
1309 return;
1310 VM_ASSERT_EMT(pVM);
1311
1312 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1313 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1314
1315 /* Return silently if not supported. */
1316 if (!pBlkCacheGlobal)
1317 return;
1318
1319 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1320
1321 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1322 {
1323 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1324 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1325 PDMR3BlkCacheRelease(pBlkCache);
1326 }
1327
1328 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1329}
1330
1331static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1332{
1333 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1334 PPDMBLKCACHEENTRY pEntry = NULL;
1335
1336 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1337
1338 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1339 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1340 if (pEntry)
1341 pdmBlkCacheEntryRef(pEntry);
1342 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1343
1344 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1345
1346 return pEntry;
1347}
1348
1349/**
1350 * Return the best fit cache entries for the given offset.
1351 *
1352 * @returns nothing.
1353 * @param pBlkCache The endpoint cache.
1354 * @param off The offset.
1355 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1356 * the given offset. NULL if not required.
1357 */
1358static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1359 PPDMBLKCACHEENTRY *ppEntryAbove)
1360{
1361 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1362
1363 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1364
1365 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1366 if (ppEntryAbove)
1367 {
1368 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1369 if (*ppEntryAbove)
1370 pdmBlkCacheEntryRef(*ppEntryAbove);
1371 }
1372
1373 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1374
1375 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1376}
1377
1378static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1379{
1380 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1381
1382 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1383 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1384 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1385 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1386 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1387 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1388}
1389
1390/**
1391 * Allocates and initializes a new entry for the cache.
1392 * The entry has a reference count of 1.
1393 *
1394 * @returns Pointer to the new cache entry or NULL if out of memory.
1395 * @param pBlkCache The cache the entry belongs to.
1396 * @param off Start offset.
1397 * @param cbData Size of the cache entry.
1398 * @param pbBuffer Pointer to the buffer to use.
1399 * NULL if a new buffer should be allocated.
1400 * The buffer needs to have the same size of the entry.
1401 */
1402static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1403 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1404{
1405 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1406
1407 if (RT_UNLIKELY(!pEntryNew))
1408 return NULL;
1409
1410 pEntryNew->Core.Key = off;
1411 pEntryNew->Core.KeyLast = off + cbData - 1;
1412 pEntryNew->pBlkCache = pBlkCache;
1413 pEntryNew->fFlags = 0;
1414 pEntryNew->cRefs = 1; /* We are using it now. */
1415 pEntryNew->pList = NULL;
1416 pEntryNew->cbData = cbData;
1417 pEntryNew->pWaitingHead = NULL;
1418 pEntryNew->pWaitingTail = NULL;
1419 if (pbBuffer)
1420 pEntryNew->pbData = pbBuffer;
1421 else
1422 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1423
1424 if (RT_UNLIKELY(!pEntryNew->pbData))
1425 {
1426 RTMemFree(pEntryNew);
1427 return NULL;
1428 }
1429
1430 return pEntryNew;
1431}
1432
1433/**
1434 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1435 * in exclusive mode.
1436 *
1437 * @returns true if the flag in fSet is set and the one in fClear is clear.
1438 * false otherwise.
1439 * The R/W semaphore is only held if true is returned.
1440 *
1441 * @param pBlkCache The endpoint cache instance data.
1442 * @param pEntry The entry to check the flags for.
1443 * @param fSet The flag which is tested to be set.
1444 * @param fClear The flag which is tested to be clear.
1445 */
1446DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1447 PPDMBLKCACHEENTRY pEntry,
1448 uint32_t fSet, uint32_t fClear)
1449{
1450 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1451 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1452
1453 if (fPassed)
1454 {
1455 /* Acquire the lock and check again because the completion callback might have raced us. */
1456 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1457
1458 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1459 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1460
1461 /* Drop the lock if we didn't passed the test. */
1462 if (!fPassed)
1463 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1464 }
1465
1466 return fPassed;
1467}
1468
1469/**
1470 * Adds a segment to the waiting list for a cache entry
1471 * which is currently in progress.
1472 *
1473 * @returns nothing.
1474 * @param pEntry The cache entry to add the segment to.
1475 * @param pSeg The segment to add.
1476 */
1477DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1478 PPDMBLKCACHEWAITER pWaiter)
1479{
1480 pWaiter->pNext = NULL;
1481
1482 if (pEntry->pWaitingHead)
1483 {
1484 AssertPtr(pEntry->pWaitingTail);
1485
1486 pEntry->pWaitingTail->pNext = pWaiter;
1487 pEntry->pWaitingTail = pWaiter;
1488 }
1489 else
1490 {
1491 Assert(!pEntry->pWaitingTail);
1492
1493 pEntry->pWaitingHead = pWaiter;
1494 pEntry->pWaitingTail = pWaiter;
1495 }
1496}
1497
1498/**
1499 * Add a buffer described by the I/O memory context
1500 * to the entry waiting for completion.
1501 *
1502 * @returns VBox status code.
1503 * @param pEntry The entry to add the buffer to.
1504 * @param pTask Task associated with the buffer.
1505 * @param pIoMemCtx The memory context to use.
1506 * @param offDiff Offset from the start of the buffer
1507 * in the entry.
1508 * @param cbData Amount of data to wait for onthis entry.
1509 * @param fWrite Flag whether the task waits because it wants to write
1510 * to the cache entry.
1511 */
1512static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1513 PPDMBLKCACHEREQ pReq,
1514 PRTSGBUF pSgBuf, uint64_t offDiff,
1515 size_t cbData, bool fWrite)
1516{
1517 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1518 if (!pWaiter)
1519 return VERR_NO_MEMORY;
1520
1521 ASMAtomicIncU32(&pReq->cXfersPending);
1522 pWaiter->pReq = pReq;
1523 pWaiter->offCacheEntry = offDiff;
1524 pWaiter->cbTransfer = cbData;
1525 pWaiter->fWrite = fWrite;
1526 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1527 RTSgBufAdvance(pSgBuf, cbData);
1528
1529 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1530
1531 return VINF_SUCCESS;
1532}
1533
1534/**
1535 * Calculate aligned offset and size for a new cache entry
1536 * which do not intersect with an already existing entry and the
1537 * file end.
1538 *
1539 * @returns The number of bytes the entry can hold of the requested amount
1540 * of byte.
1541 * @param pEndpoint The endpoint.
1542 * @param pBlkCache The endpoint cache.
1543 * @param off The start offset.
1544 * @param cb The number of bytes the entry needs to hold at least.
1545 * @param uAlignment Alignment of the boundary sizes.
1546 * @param poffAligned Where to store the aligned offset.
1547 * @param pcbAligned Where to store the aligned size of the entry.
1548 */
1549static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1550 uint64_t off, size_t cb,
1551 unsigned uAlignment,
1552 uint64_t *poffAligned, size_t *pcbAligned)
1553{
1554 size_t cbAligned;
1555 size_t cbInEntry = 0;
1556 uint64_t offAligned;
1557 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1558
1559 /* Get the best fit entries around the offset */
1560 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1561
1562 /* Log the info */
1563 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1564 pEntryAbove ? "B" : "No b",
1565 off,
1566 pEntryAbove ? pEntryAbove->Core.Key : 0,
1567 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1568 pEntryAbove ? pEntryAbove->cbData : 0));
1569
1570 offAligned = off;
1571
1572 if ( pEntryAbove
1573 && off + cb > pEntryAbove->Core.Key)
1574 {
1575 cbInEntry = pEntryAbove->Core.Key - off;
1576 cbAligned = pEntryAbove->Core.Key - offAligned;
1577 }
1578 else
1579 {
1580 cbAligned = cb;
1581 cbInEntry = cb;
1582 }
1583
1584 /* A few sanity checks */
1585 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1586 ("Aligned size intersects with another cache entry\n"));
1587 Assert(cbInEntry <= cbAligned);
1588
1589 if (pEntryAbove)
1590 pdmBlkCacheEntryRelease(pEntryAbove);
1591
1592 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1593
1594 *poffAligned = offAligned;
1595 *pcbAligned = cbAligned;
1596
1597 return cbInEntry;
1598}
1599
1600/**
1601 * Create a new cache entry evicting data from the cache if required.
1602 *
1603 * @returns Pointer to the new cache entry or NULL
1604 * if not enough bytes could be evicted from the cache.
1605 * @param pEndpoint The endpoint.
1606 * @param pBlkCache The endpoint cache.
1607 * @param off The offset.
1608 * @param cb Number of bytes the cache entry should have.
1609 * @param uAlignment Alignment the size of the entry should have.
1610 * @param pcbData Where to store the number of bytes the new
1611 * entry can hold. May be lower than actually requested
1612 * due to another entry intersecting the access range.
1613 */
1614static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1615 uint64_t off, size_t cb,
1616 unsigned uAlignment,
1617 size_t *pcbData)
1618{
1619 uint64_t offStart = 0;
1620 size_t cbEntry = 0;
1621 PPDMBLKCACHEENTRY pEntryNew = NULL;
1622 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1623 uint8_t *pbBuffer = NULL;
1624
1625 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1626 &offStart, &cbEntry);
1627
1628 pdmBlkCacheLockEnter(pCache);
1629 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1630
1631 if (fEnough)
1632 {
1633 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1634
1635 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1636 if (RT_LIKELY(pEntryNew))
1637 {
1638 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1639 pdmBlkCacheAdd(pCache, cbEntry);
1640 pdmBlkCacheLockLeave(pCache);
1641
1642 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1643
1644 AssertMsg( (off >= pEntryNew->Core.Key)
1645 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1646 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1647 off, pEntryNew->Core.Key));
1648 }
1649 else
1650 pdmBlkCacheLockLeave(pCache);
1651 }
1652 else
1653 pdmBlkCacheLockLeave(pCache);
1654
1655 return pEntryNew;
1656}
1657
1658static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1659{
1660 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1661
1662 if (RT_LIKELY(pReq))
1663 {
1664 pReq->pvUser = pvUser;
1665 pReq->cbXfer = cbXfer;
1666 pReq->rcReq = VINF_SUCCESS;
1667 pReq->cXfersPending = 0;
1668 }
1669
1670 return pReq;
1671}
1672
1673static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1674{
1675 switch (pBlkCache->enmType)
1676 {
1677 case PDMBLKCACHETYPE_DEV:
1678 {
1679 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1680 pReq->pvUser, pReq->rcReq);
1681 break;
1682 }
1683 case PDMBLKCACHETYPE_DRV:
1684 {
1685 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1686 pReq->pvUser, pReq->rcReq);
1687 break;
1688 }
1689 case PDMBLKCACHETYPE_USB:
1690 {
1691 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1692 pReq->pvUser, pReq->rcReq);
1693 break;
1694 }
1695 case PDMBLKCACHETYPE_INTERNAL:
1696 {
1697 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1698 pReq->pvUser, pReq->rcReq);
1699 break;
1700 }
1701 default:
1702 AssertMsgFailed(("Unknown block cache type!\n"));
1703 }
1704
1705 RTMemFree(pReq);
1706}
1707
1708static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1709 size_t cbComplete, int rcReq, bool fCallHandler)
1710{
1711 if (RT_FAILURE(rcReq))
1712 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1713
1714 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1715 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1716 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1717 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1718
1719 if ( !(cbOld - cbComplete)
1720 && !cXfersPending)
1721 {
1722 if (fCallHandler)
1723 pdmBlkCacheReqComplete(pBlkCache, pReq);
1724 return true;
1725 }
1726
1727 LogFlowFunc(("pReq=%#p cXfersPending=%u cbXfer=%u\n", pReq, cXfersPending, (cbOld - cbComplete)));
1728 return false;
1729}
1730
1731VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1732 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1733{
1734 int rc = VINF_SUCCESS;
1735 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1736 PPDMBLKCACHEENTRY pEntry;
1737 PPDMBLKCACHEREQ pReq;
1738
1739 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1740 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1741
1742 RTSGBUF SgBuf;
1743 RTSgBufClone(&SgBuf, pcSgBuf);
1744
1745 /* Allocate new request structure. */
1746 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1747 if (RT_UNLIKELY(!pReq))
1748 return VERR_NO_MEMORY;
1749
1750 /* Increment data transfer counter to keep the request valid while we access it. */
1751 ASMAtomicIncU32(&pReq->cXfersPending);
1752
1753 while (cbRead)
1754 {
1755 size_t cbToRead;
1756
1757 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1758
1759 /*
1760 * If there is no entry we try to create a new one eviciting unused pages
1761 * if the cache is full. If this is not possible we will pass the request through
1762 * and skip the caching (all entries may be still in progress so they can't
1763 * be evicted)
1764 * If we have an entry it can be in one of the LRU lists where the entry
1765 * contains data (recently used or frequently used LRU) so we can just read
1766 * the data we need and put the entry at the head of the frequently used LRU list.
1767 * In case the entry is in one of the ghost lists it doesn't contain any data.
1768 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1769 */
1770 if (pEntry)
1771 {
1772 uint64_t offDiff = off - pEntry->Core.Key;
1773
1774 AssertMsg(off >= pEntry->Core.Key,
1775 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1776 off, pEntry->Core.Key));
1777
1778 AssertPtr(pEntry->pList);
1779
1780 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1781
1782 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1783 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1784 off, cbToRead));
1785
1786 cbRead -= cbToRead;
1787
1788 if (!cbRead)
1789 STAM_COUNTER_INC(&pCache->cHits);
1790 else
1791 STAM_COUNTER_INC(&pCache->cPartialHits);
1792
1793 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1794
1795 /* Ghost lists contain no data. */
1796 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1797 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1798 {
1799 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1800 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1801 PDMBLKCACHE_ENTRY_IS_DIRTY))
1802 {
1803 /* Entry didn't completed yet. Append to the list */
1804 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1805 &SgBuf, offDiff, cbToRead,
1806 false /* fWrite */);
1807 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1808 }
1809 else
1810 {
1811 /* Read as much as we can from the entry. */
1812 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
1813 ASMAtomicSubU32(&pReq->cbXfer, cbToRead);
1814 }
1815
1816 /* Move this entry to the top position */
1817 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1818 {
1819 pdmBlkCacheLockEnter(pCache);
1820 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1821 pdmBlkCacheLockLeave(pCache);
1822 }
1823 /* Release the entry */
1824 pdmBlkCacheEntryRelease(pEntry);
1825 }
1826 else
1827 {
1828 uint8_t *pbBuffer = NULL;
1829
1830 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1831
1832 pdmBlkCacheLockEnter(pCache);
1833 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1834 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1835
1836 /* Move the entry to Am and fetch it to the cache. */
1837 if (fEnough)
1838 {
1839 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1840 pdmBlkCacheAdd(pCache, pEntry->cbData);
1841 pdmBlkCacheLockLeave(pCache);
1842
1843 if (pbBuffer)
1844 pEntry->pbData = pbBuffer;
1845 else
1846 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1847 AssertPtr(pEntry->pbData);
1848
1849 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1850 &SgBuf, offDiff, cbToRead,
1851 false /* fWrite */);
1852 pdmBlkCacheEntryReadFromMedium(pEntry);
1853 /* Release the entry */
1854 pdmBlkCacheEntryRelease(pEntry);
1855 }
1856 else
1857 {
1858 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1859 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1860 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
1861 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1862 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1863
1864 pdmBlkCacheLockLeave(pCache);
1865
1866 RTMemFree(pEntry);
1867
1868 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1869 &SgBuf, off, cbToRead,
1870 PDMBLKCACHEXFERDIR_READ);
1871 }
1872 }
1873 }
1874 else
1875 {
1876#ifdef VBOX_WITH_IO_READ_CACHE
1877 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
1878 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
1879 off, cbRead,
1880 PAGE_SIZE,
1881 &cbToRead);
1882
1883 cbRead -= cbToRead;
1884
1885 if (pEntryNew)
1886 {
1887 if (!cbRead)
1888 STAM_COUNTER_INC(&pCache->cMisses);
1889 else
1890 STAM_COUNTER_INC(&pCache->cPartialHits);
1891
1892 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
1893 &SgBuf,
1894 off - pEntryNew->Core.Key,
1895 cbToRead,
1896 false /* fWrite */);
1897 pdmBlkCacheEntryReadFromMedium(pEntryNew);
1898 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1899 }
1900 else
1901 {
1902 /*
1903 * There is not enough free space in the cache.
1904 * Pass the request directly to the I/O manager.
1905 */
1906 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1907
1908 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1909 &SgBuf, off, cbToRead,
1910 PDMBLKCACHEXFERDIR_READ);
1911 }
1912#else
1913 /* Clip read size if necessary. */
1914 PPDMBLKCACHEENTRY pEntryAbove;
1915 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove);
1916
1917 if (pEntryAbove)
1918 {
1919 if (off + cbRead > pEntryAbove->Core.Key)
1920 cbToRead = pEntryAbove->Core.Key - off;
1921 else
1922 cbToRead = cbRead;
1923
1924 pdmBlkCacheEntryRelease(pEntryAbove);
1925 }
1926 else
1927 cbToRead = cbRead;
1928
1929 cbRead -= cbToRead;
1930 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1931 &SgBuf, off, cbToRead,
1932 PDMBLKCACHEXFERDIR_READ);
1933#endif
1934 }
1935 off += cbToRead;
1936 }
1937
1938 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
1939 rc = VINF_AIO_TASK_PENDING;
1940
1941 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1942
1943 return rc;
1944}
1945
1946VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
1947 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
1948{
1949 int rc = VINF_SUCCESS;
1950 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1951 PPDMBLKCACHEENTRY pEntry;
1952 PPDMBLKCACHEREQ pReq;
1953
1954 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
1955 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
1956
1957 RTSGBUF SgBuf;
1958 RTSgBufClone(&SgBuf, pcSgBuf);
1959
1960 /* Allocate new request structure. */
1961 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
1962 if (RT_UNLIKELY(!pReq))
1963 return VERR_NO_MEMORY;
1964
1965 /* Increment data transfer counter to keep the request valid while we access it. */
1966 ASMAtomicIncU32(&pReq->cXfersPending);
1967
1968 while (cbWrite)
1969 {
1970 size_t cbToWrite;
1971
1972 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1973
1974 if (pEntry)
1975 {
1976 /* Write the data into the entry and mark it as dirty */
1977 AssertPtr(pEntry->pList);
1978
1979 uint64_t offDiff = off - pEntry->Core.Key;
1980
1981 AssertMsg(off >= pEntry->Core.Key,
1982 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1983 off, pEntry->Core.Key));
1984
1985 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
1986 cbWrite -= cbToWrite;
1987
1988 if (!cbWrite)
1989 STAM_COUNTER_INC(&pCache->cHits);
1990 else
1991 STAM_COUNTER_INC(&pCache->cPartialHits);
1992
1993 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
1994
1995 /* Ghost lists contain no data. */
1996 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1997 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1998 {
1999 /* Check if the entry is dirty. */
2000 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2001 PDMBLKCACHE_ENTRY_IS_DIRTY,
2002 0))
2003 {
2004 /* If it is already dirty but not in progress just update the data. */
2005 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2006 {
2007 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2008 cbToWrite);
2009 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2010 }
2011 else
2012 {
2013 /* The data isn't written to the file yet */
2014 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2015 &SgBuf, offDiff, cbToWrite,
2016 true /* fWrite */);
2017 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2018 }
2019
2020 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2021 }
2022 else /* Dirty bit not set */
2023 {
2024 /*
2025 * Check if a read is in progress for this entry.
2026 * We have to defer processing in that case.
2027 */
2028 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2029 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2030 0))
2031 {
2032 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2033 &SgBuf, offDiff, cbToWrite,
2034 true /* fWrite */);
2035 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2036 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2037 }
2038 else /* I/O in progress flag not set */
2039 {
2040 /* Write as much as we can into the entry and update the file. */
2041 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2042 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2043
2044 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2045 if (fCommit)
2046 pdmBlkCacheCommitDirtyEntries(pCache);
2047 }
2048 } /* Dirty bit not set */
2049
2050 /* Move this entry to the top position */
2051 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2052 {
2053 pdmBlkCacheLockEnter(pCache);
2054 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2055 pdmBlkCacheLockLeave(pCache);
2056 }
2057
2058 pdmBlkCacheEntryRelease(pEntry);
2059 }
2060 else /* Entry is on the ghost list */
2061 {
2062 uint8_t *pbBuffer = NULL;
2063
2064 pdmBlkCacheLockEnter(pCache);
2065 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2066 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2067
2068 if (fEnough)
2069 {
2070 /* Move the entry to Am and fetch it to the cache. */
2071 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2072 pdmBlkCacheAdd(pCache, pEntry->cbData);
2073 pdmBlkCacheLockLeave(pCache);
2074
2075 if (pbBuffer)
2076 pEntry->pbData = pbBuffer;
2077 else
2078 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2079 AssertPtr(pEntry->pbData);
2080
2081 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2082 &SgBuf, offDiff, cbToWrite,
2083 true /* fWrite */);
2084 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2085 pdmBlkCacheEntryReadFromMedium(pEntry);
2086
2087 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2088 pdmBlkCacheEntryRelease(pEntry);
2089 }
2090 else
2091 {
2092 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2093 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2094 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2095 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2096 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2097
2098 pdmBlkCacheLockLeave(pCache);
2099
2100 RTMemFree(pEntry);
2101 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2102 &SgBuf, off, cbToWrite,
2103 PDMBLKCACHEXFERDIR_WRITE);
2104 }
2105 }
2106 }
2107 else /* No entry found */
2108 {
2109 /*
2110 * No entry found. Try to create a new cache entry to store the data in and if that fails
2111 * write directly to the file.
2112 */
2113 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2114 off, cbWrite,
2115 512, &cbToWrite);
2116
2117 cbWrite -= cbToWrite;
2118
2119 if (pEntryNew)
2120 {
2121 uint64_t offDiff = off - pEntryNew->Core.Key;
2122
2123 STAM_COUNTER_INC(&pCache->cHits);
2124
2125 /*
2126 * Check if it is possible to just write the data without waiting
2127 * for it to get fetched first.
2128 */
2129 if (!offDiff && pEntryNew->cbData == cbToWrite)
2130 {
2131 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2132 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2133
2134 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2135 if (fCommit)
2136 pdmBlkCacheCommitDirtyEntries(pCache);
2137 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2138 }
2139 else
2140 {
2141 /* Defer the write and fetch the data from the endpoint. */
2142 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2143 &SgBuf, offDiff, cbToWrite,
2144 true /* fWrite */);
2145 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2146 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2147 }
2148
2149 pdmBlkCacheEntryRelease(pEntryNew);
2150 }
2151 else
2152 {
2153 /*
2154 * There is not enough free space in the cache.
2155 * Pass the request directly to the I/O manager.
2156 */
2157 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2158
2159 STAM_COUNTER_INC(&pCache->cMisses);
2160
2161 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2162 &SgBuf, off, cbToWrite,
2163 PDMBLKCACHEXFERDIR_WRITE);
2164 }
2165 }
2166
2167 off += cbToWrite;
2168 }
2169
2170 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2171 rc = VINF_AIO_TASK_PENDING;
2172
2173 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2174
2175 return rc;
2176}
2177
2178VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2179{
2180 int rc = VINF_SUCCESS;
2181 PPDMBLKCACHEREQ pReq;
2182
2183 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2184
2185 /* Commit dirty entries in the cache. */
2186 pdmBlkCacheCommit(pBlkCache);
2187
2188 /* Allocate new request structure. */
2189 pReq = pdmBlkCacheReqAlloc(0, pvUser);
2190 if (RT_UNLIKELY(!pReq))
2191 return VERR_NO_MEMORY;
2192
2193 rc = pdmBlkCacheRequestPassthrough(pBlkCache, pReq, NULL, 0, 0,
2194 PDMBLKCACHEXFERDIR_FLUSH);
2195 AssertRC(rc);
2196
2197 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2198 return VINF_AIO_TASK_PENDING;
2199}
2200
2201/**
2202 * Completes a task segment freeing all resources and completes the task handle
2203 * if everything was transferred.
2204 *
2205 * @returns Next task segment handle.
2206 * @param pTaskSeg Task segment to complete.
2207 * @param rc Status code to set.
2208 */
2209static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2210 PPDMBLKCACHEWAITER pWaiter,
2211 int rc)
2212{
2213 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2214 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2215
2216 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2217
2218 RTMemFree(pWaiter);
2219
2220 return pNext;
2221}
2222
2223static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2224{
2225 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2226 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2227
2228 /* Reference the entry now as we are clearing the I/O in progress flag
2229 * which protected the entry till now. */
2230 pdmBlkCacheEntryRef(pEntry);
2231
2232 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2233 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2234
2235 /* Process waiting segment list. The data in entry might have changed in-between. */
2236 bool fDirty = false;
2237 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2238 PPDMBLKCACHEWAITER pCurr = pComplete;
2239
2240 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2241 ("The list tail was not updated correctly\n"));
2242 pEntry->pWaitingTail = NULL;
2243 pEntry->pWaitingHead = NULL;
2244
2245 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2246 {
2247 /*
2248 * An error here is difficult to handle as the original request completed already.
2249 * The error is logged for now and the VM is paused.
2250 * If the user continues the entry is written again in the hope
2251 * the user fixed the problem and the next write succeeds.
2252 */
2253 if (RT_FAILURE(rcIoXfer))
2254 {
2255 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\"\n",
2256 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId));
2257
2258 if (!ASMAtomicXchgBool(&pCache->fIoErrorVmSuspended, true))
2259 {
2260 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2261 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc). "
2262 "Make sure there is enough free space on the disk and that the disk is working properly. "
2263 "Operation can be resumed afterwards"),
2264 pBlkCache->pszId, rcIoXfer);
2265 AssertRC(rc);
2266 }
2267
2268 /*
2269 * The entry is still marked as dirty which prevents eviction.
2270 * Add the waiters to the list again.
2271 */
2272 fDirty = true;
2273
2274 if (pComplete)
2275 {
2276 pEntry->pWaitingHead = pComplete;
2277 while (pComplete->pNext)
2278 pComplete = pComplete->pNext;
2279 pEntry->pWaitingTail = pComplete;
2280 pComplete = NULL;
2281 }
2282 }
2283 else
2284 {
2285 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2286
2287 while (pCurr)
2288 {
2289 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2290
2291 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2292 fDirty = true;
2293
2294 pCurr = pCurr->pNext;
2295 }
2296 }
2297 }
2298 else
2299 {
2300 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2301 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2302 ("Invalid flags set\n"));
2303
2304 while (pCurr)
2305 {
2306 if (pCurr->fWrite)
2307 {
2308 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2309 fDirty = true;
2310 }
2311 else
2312 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2313
2314 pCurr = pCurr->pNext;
2315 }
2316 }
2317
2318 bool fCommit = false;
2319 if (fDirty)
2320 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2321
2322 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2323
2324 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2325 pdmBlkCacheEntryRelease(pEntry);
2326
2327 if (fCommit)
2328 pdmBlkCacheCommitDirtyEntries(pCache);
2329
2330 /* Complete waiters now. */
2331 while (pComplete)
2332 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2333}
2334
2335VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2336{
2337 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2338
2339 if (hIoXfer->fIoCache)
2340 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2341 else
2342 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2343}
2344
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette