VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMBlkCache.cpp@ 34246

Last change on this file since 34246 was 34246, checked in by vboxsync, 14 years ago

PDMBlkCache: Bug fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 84.2 KB
Line 
1/* $Id: PDMBlkCache.cpp 34246 2010-11-22 15:13:19Z vboxsync $ */
2/** @file
3 * PDM Block Cache.
4 */
5
6/*
7 * Copyright (C) 2006-2008 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/** @page pg_pdm_block_cache PDM Block Cache - The I/O cache
19 * This component implements an I/O cache based on the 2Q cache algorithm.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#define LOG_GROUP LOG_GROUP_PDM_BLK_CACHE
26#include "PDMInternal.h"
27#include <iprt/asm.h>
28#include <iprt/mem.h>
29#include <iprt/path.h>
30#include <iprt/string.h>
31#include <VBox/log.h>
32#include <VBox/stam.h>
33#include <VBox/uvm.h>
34#include <VBox/vm.h>
35
36#include "PDMBlkCacheInternal.h"
37
38#ifdef VBOX_STRICT
39# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) \
40 do \
41 { \
42 AssertMsg(RTCritSectIsOwner(&Cache->CritSect), \
43 ("Thread does not own critical section\n"));\
44 } while(0)
45
46# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) \
47 do \
48 { \
49 AssertMsg(RTSemRWIsWriteOwner(pEpCache->SemRWEntries), \
50 ("Thread is not exclusive owner of the per endpoint RW semaphore\n")); \
51 } while(0)
52
53# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) \
54 do \
55 { \
56 AssertMsg(RTSemRWIsReadOwner(pEpCache->SemRWEntries), \
57 ("Thread is not read owner of the per endpoint RW semaphore\n")); \
58 } while(0)
59
60#else
61# define PDMACFILECACHE_IS_CRITSECT_OWNER(Cache) do { } while(0)
62# define PDMACFILECACHE_EP_IS_SEMRW_WRITE_OWNER(pEpCache) do { } while(0)
63# define PDMACFILECACHE_EP_IS_SEMRW_READ_OWNER(pEpCache) do { } while(0)
64#endif
65
66/*******************************************************************************
67* Internal Functions *
68*******************************************************************************/
69
70/**
71 * Decrement the reference counter of the given cache entry.
72 *
73 * @returns nothing.
74 * @param pEntry The entry to release.
75 */
76DECLINLINE(void) pdmBlkCacheEntryRelease(PPDMBLKCACHEENTRY pEntry)
77{
78 AssertMsg(pEntry->cRefs > 0, ("Trying to release a not referenced entry\n"));
79 ASMAtomicDecU32(&pEntry->cRefs);
80}
81
82/**
83 * Increment the reference counter of the given cache entry.
84 *
85 * @returns nothing.
86 * @param pEntry The entry to reference.
87 */
88DECLINLINE(void) pdmBlkCacheEntryRef(PPDMBLKCACHEENTRY pEntry)
89{
90 ASMAtomicIncU32(&pEntry->cRefs);
91}
92
93#ifdef DEBUG
94static void pdmBlkCacheValidate(PPDMBLKCACHEGLOBAL pCache)
95{
96 /* Amount of cached data should never exceed the maximum amount. */
97 AssertMsg(pCache->cbCached <= pCache->cbMax,
98 ("Current amount of cached data exceeds maximum\n"));
99
100 /* The amount of cached data in the LRU and FRU list should match cbCached */
101 AssertMsg(pCache->LruRecentlyUsedIn.cbCached + pCache->LruFrequentlyUsed.cbCached == pCache->cbCached,
102 ("Amount of cached data doesn't match\n"));
103
104 AssertMsg(pCache->LruRecentlyUsedOut.cbCached <= pCache->cbRecentlyUsedOutMax,
105 ("Paged out list exceeds maximum\n"));
106}
107#endif
108
109DECLINLINE(void) pdmBlkCacheLockEnter(PPDMBLKCACHEGLOBAL pCache)
110{
111 RTCritSectEnter(&pCache->CritSect);
112#ifdef DEBUG
113 pdmBlkCacheValidate(pCache);
114#endif
115}
116
117DECLINLINE(void) pdmBlkCacheLockLeave(PPDMBLKCACHEGLOBAL pCache)
118{
119#ifdef DEBUG
120 pdmBlkCacheValidate(pCache);
121#endif
122 RTCritSectLeave(&pCache->CritSect);
123}
124
125DECLINLINE(void) pdmBlkCacheSub(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
126{
127 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
128 pCache->cbCached -= cbAmount;
129}
130
131DECLINLINE(void) pdmBlkCacheAdd(PPDMBLKCACHEGLOBAL pCache, uint32_t cbAmount)
132{
133 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
134 pCache->cbCached += cbAmount;
135}
136
137DECLINLINE(void) pdmBlkCacheListAdd(PPDMBLKLRULIST pList, uint32_t cbAmount)
138{
139 pList->cbCached += cbAmount;
140}
141
142DECLINLINE(void) pdmBlkCacheListSub(PPDMBLKLRULIST pList, uint32_t cbAmount)
143{
144 pList->cbCached -= cbAmount;
145}
146
147#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
148/**
149 * Checks consistency of a LRU list.
150 *
151 * @returns nothing
152 * @param pList The LRU list to check.
153 * @param pNotInList Element which is not allowed to occur in the list.
154 */
155static void pdmBlkCacheCheckList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pNotInList)
156{
157 PPDMBLKCACHEENTRY pCurr = pList->pHead;
158
159 /* Check that there are no double entries and no cycles in the list. */
160 while (pCurr)
161 {
162 PPDMBLKCACHEENTRY pNext = pCurr->pNext;
163
164 while (pNext)
165 {
166 AssertMsg(pCurr != pNext,
167 ("Entry %#p is at least two times in list %#p or there is a cycle in the list\n",
168 pCurr, pList));
169 pNext = pNext->pNext;
170 }
171
172 AssertMsg(pCurr != pNotInList, ("Not allowed entry %#p is in list\n", pCurr));
173
174 if (!pCurr->pNext)
175 AssertMsg(pCurr == pList->pTail, ("End of list reached but last element is not list tail\n"));
176
177 pCurr = pCurr->pNext;
178 }
179}
180#endif
181
182/**
183 * Unlinks a cache entry from the LRU list it is assigned to.
184 *
185 * @returns nothing.
186 * @param pEntry The entry to unlink.
187 */
188static void pdmBlkCacheEntryRemoveFromList(PPDMBLKCACHEENTRY pEntry)
189{
190 PPDMBLKLRULIST pList = pEntry->pList;
191 PPDMBLKCACHEENTRY pPrev, pNext;
192
193 LogFlowFunc((": Deleting entry %#p from list %#p\n", pEntry, pList));
194
195 AssertPtr(pList);
196
197#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
198 pdmBlkCacheCheckList(pList, NULL);
199#endif
200
201 pPrev = pEntry->pPrev;
202 pNext = pEntry->pNext;
203
204 AssertMsg(pEntry != pPrev, ("Entry links to itself as previous element\n"));
205 AssertMsg(pEntry != pNext, ("Entry links to itself as next element\n"));
206
207 if (pPrev)
208 pPrev->pNext = pNext;
209 else
210 {
211 pList->pHead = pNext;
212
213 if (pNext)
214 pNext->pPrev = NULL;
215 }
216
217 if (pNext)
218 pNext->pPrev = pPrev;
219 else
220 {
221 pList->pTail = pPrev;
222
223 if (pPrev)
224 pPrev->pNext = NULL;
225 }
226
227 pEntry->pList = NULL;
228 pEntry->pPrev = NULL;
229 pEntry->pNext = NULL;
230 pdmBlkCacheListSub(pList, pEntry->cbData);
231#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
232 pdmBlkCacheCheckList(pList, pEntry);
233#endif
234}
235
236/**
237 * Adds a cache entry to the given LRU list unlinking it from the currently
238 * assigned list if needed.
239 *
240 * @returns nothing.
241 * @param pList List to the add entry to.
242 * @param pEntry Entry to add.
243 */
244static void pdmBlkCacheEntryAddToList(PPDMBLKLRULIST pList, PPDMBLKCACHEENTRY pEntry)
245{
246 LogFlowFunc((": Adding entry %#p to list %#p\n", pEntry, pList));
247#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
248 pdmBlkCacheCheckList(pList, NULL);
249#endif
250
251 /* Remove from old list if needed */
252 if (pEntry->pList)
253 pdmBlkCacheEntryRemoveFromList(pEntry);
254
255 pEntry->pNext = pList->pHead;
256 if (pList->pHead)
257 pList->pHead->pPrev = pEntry;
258 else
259 {
260 Assert(!pList->pTail);
261 pList->pTail = pEntry;
262 }
263
264 pEntry->pPrev = NULL;
265 pList->pHead = pEntry;
266 pdmBlkCacheListAdd(pList, pEntry->cbData);
267 pEntry->pList = pList;
268#ifdef PDMACFILECACHE_WITH_LRULIST_CHECKS
269 pdmBlkCacheCheckList(pList, NULL);
270#endif
271}
272
273/**
274 * Destroys a LRU list freeing all entries.
275 *
276 * @returns nothing
277 * @param pList Pointer to the LRU list to destroy.
278 *
279 * @note The caller must own the critical section of the cache.
280 */
281static void pdmBlkCacheDestroyList(PPDMBLKLRULIST pList)
282{
283 while (pList->pHead)
284 {
285 PPDMBLKCACHEENTRY pEntry = pList->pHead;
286
287 pList->pHead = pEntry->pNext;
288
289 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
290 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
291
292 RTMemPageFree(pEntry->pbData, pEntry->cbData);
293 RTMemFree(pEntry);
294 }
295}
296
297/**
298 * Tries to remove the given amount of bytes from a given list in the cache
299 * moving the entries to one of the given ghosts lists
300 *
301 * @returns Amount of data which could be freed.
302 * @param pCache Pointer to the global cache data.
303 * @param cbData The amount of the data to free.
304 * @param pListSrc The source list to evict data from.
305 * @param pGhostListSrc The ghost list removed entries should be moved to
306 * NULL if the entry should be freed.
307 * @param fReuseBuffer Flag whether a buffer should be reused if it has the same size
308 * @param ppbBuf Where to store the address of the buffer if an entry with the
309 * same size was found and fReuseBuffer is true.
310 *
311 * @note This function may return fewer bytes than requested because entries
312 * may be marked as non evictable if they are used for I/O at the
313 * moment.
314 */
315static size_t pdmBlkCacheEvictPagesFrom(PPDMBLKCACHEGLOBAL pCache, size_t cbData,
316 PPDMBLKLRULIST pListSrc, PPDMBLKLRULIST pGhostListDst,
317 bool fReuseBuffer, uint8_t **ppbBuffer)
318{
319 size_t cbEvicted = 0;
320
321 PDMACFILECACHE_IS_CRITSECT_OWNER(pCache);
322
323 AssertMsg(cbData > 0, ("Evicting 0 bytes not possible\n"));
324 AssertMsg( !pGhostListDst
325 || (pGhostListDst == &pCache->LruRecentlyUsedOut),
326 ("Destination list must be NULL or the recently used but paged out list\n"));
327
328 if (fReuseBuffer)
329 {
330 AssertPtr(ppbBuffer);
331 *ppbBuffer = NULL;
332 }
333
334 /* Start deleting from the tail. */
335 PPDMBLKCACHEENTRY pEntry = pListSrc->pTail;
336
337 while ((cbEvicted < cbData) && pEntry)
338 {
339 PPDMBLKCACHEENTRY pCurr = pEntry;
340
341 pEntry = pEntry->pPrev;
342
343 /* We can't evict pages which are currently in progress or dirty but not in progress */
344 if ( !(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
345 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
346 {
347 /* Ok eviction candidate. Grab the endpoint semaphore and check again
348 * because somebody else might have raced us. */
349 PPDMBLKCACHE pBlkCache = pCurr->pBlkCache;
350 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
351
352 if (!(pCurr->fFlags & PDMBLKCACHE_NOT_EVICTABLE)
353 && (ASMAtomicReadU32(&pCurr->cRefs) == 0))
354 {
355 LogFlow(("Evicting entry %#p (%u bytes)\n", pCurr, pCurr->cbData));
356
357 if (fReuseBuffer && (pCurr->cbData == cbData))
358 {
359 STAM_COUNTER_INC(&pCache->StatBuffersReused);
360 *ppbBuffer = pCurr->pbData;
361 }
362 else if (pCurr->pbData)
363 RTMemPageFree(pCurr->pbData, pCurr->cbData);
364
365 pCurr->pbData = NULL;
366 cbEvicted += pCurr->cbData;
367
368 pdmBlkCacheEntryRemoveFromList(pCurr);
369 pdmBlkCacheSub(pCache, pCurr->cbData);
370
371 if (pGhostListDst)
372 {
373 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
374
375 PPDMBLKCACHEENTRY pGhostEntFree = pGhostListDst->pTail;
376
377 /* We have to remove the last entries from the paged out list. */
378 while ( ((pGhostListDst->cbCached + pCurr->cbData) > pCache->cbRecentlyUsedOutMax)
379 && pGhostEntFree)
380 {
381 PPDMBLKCACHEENTRY pFree = pGhostEntFree;
382 PPDMBLKCACHE pBlkCacheFree = pFree->pBlkCache;
383
384 pGhostEntFree = pGhostEntFree->pPrev;
385
386 RTSemRWRequestWrite(pBlkCacheFree->SemRWEntries, RT_INDEFINITE_WAIT);
387
388 if (ASMAtomicReadU32(&pFree->cRefs) == 0)
389 {
390 pdmBlkCacheEntryRemoveFromList(pFree);
391
392 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
393 RTAvlrU64Remove(pBlkCacheFree->pTree, pFree->Core.Key);
394 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
395
396 RTMemFree(pFree);
397 }
398
399 RTSemRWReleaseWrite(pBlkCacheFree->SemRWEntries);
400 }
401
402 if (pGhostListDst->cbCached + pCurr->cbData > pCache->cbRecentlyUsedOutMax)
403 {
404 /* Couldn't remove enough entries. Delete */
405 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
406 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
407 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
408
409 RTMemFree(pCurr);
410 }
411 else
412 pdmBlkCacheEntryAddToList(pGhostListDst, pCurr);
413 }
414 else
415 {
416 /* Delete the entry from the AVL tree it is assigned to. */
417 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
418 RTAvlrU64Remove(pCurr->pBlkCache->pTree, pCurr->Core.Key);
419 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
420
421 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
422 RTMemFree(pCurr);
423 }
424 }
425
426 }
427 else
428 LogFlow(("Entry %#p (%u bytes) is still in progress and can't be evicted\n", pCurr, pCurr->cbData));
429 }
430
431 return cbEvicted;
432}
433
434static bool pdmBlkCacheReclaim(PPDMBLKCACHEGLOBAL pCache, size_t cbData, bool fReuseBuffer, uint8_t **ppbBuffer)
435{
436 size_t cbRemoved = 0;
437
438 if ((pCache->cbCached + cbData) < pCache->cbMax)
439 return true;
440 else if ((pCache->LruRecentlyUsedIn.cbCached + cbData) > pCache->cbRecentlyUsedInMax)
441 {
442 /* Try to evict as many bytes as possible from A1in */
443 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruRecentlyUsedIn,
444 &pCache->LruRecentlyUsedOut, fReuseBuffer, ppbBuffer);
445
446 /*
447 * If it was not possible to remove enough entries
448 * try the frequently accessed cache.
449 */
450 if (cbRemoved < cbData)
451 {
452 Assert(!fReuseBuffer || !*ppbBuffer); /* It is not possible that we got a buffer with the correct size but we didn't freed enough data. */
453
454 /*
455 * If we removed something we can't pass the reuse buffer flag anymore because
456 * we don't need to evict that much data
457 */
458 if (!cbRemoved)
459 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
460 NULL, fReuseBuffer, ppbBuffer);
461 else
462 cbRemoved += pdmBlkCacheEvictPagesFrom(pCache, cbData - cbRemoved, &pCache->LruFrequentlyUsed,
463 NULL, false, NULL);
464 }
465 }
466 else
467 {
468 /* We have to remove entries from frequently access list. */
469 cbRemoved = pdmBlkCacheEvictPagesFrom(pCache, cbData, &pCache->LruFrequentlyUsed,
470 NULL, fReuseBuffer, ppbBuffer);
471 }
472
473 LogFlowFunc((": removed %u bytes, requested %u\n", cbRemoved, cbData));
474 return (cbRemoved >= cbData);
475}
476
477DECLINLINE(int) pdmBlkCacheEnqueue(PPDMBLKCACHE pBlkCache, uint64_t off, PPDMBLKCACHEIOXFER pIoXfer)
478{
479 int rc = VINF_SUCCESS;
480
481 switch (pBlkCache->enmType)
482 {
483 case PDMBLKCACHETYPE_DEV:
484 {
485 rc = pBlkCache->u.Dev.pfnXferEnqueue(pBlkCache->u.Dev.pDevIns,
486 pIoXfer->enmXferDir,
487 off, pIoXfer->cbXfer,
488 &pIoXfer->SgBuf, pIoXfer);
489 break;
490 }
491 case PDMBLKCACHETYPE_DRV:
492 {
493 rc = pBlkCache->u.Drv.pfnXferEnqueue(pBlkCache->u.Drv.pDrvIns,
494 pIoXfer->enmXferDir,
495 off, pIoXfer->cbXfer,
496 &pIoXfer->SgBuf, pIoXfer);
497 break;
498 }
499 case PDMBLKCACHETYPE_USB:
500 {
501 rc = pBlkCache->u.Usb.pfnXferEnqueue(pBlkCache->u.Usb.pUsbIns,
502 pIoXfer->enmXferDir,
503 off, pIoXfer->cbXfer,
504 &pIoXfer->SgBuf, pIoXfer);
505 break;
506 }
507 case PDMBLKCACHETYPE_INTERNAL:
508 {
509 rc = pBlkCache->u.Int.pfnXferEnqueue(pBlkCache->u.Int.pvUser,
510 pIoXfer->enmXferDir,
511 off, pIoXfer->cbXfer,
512 &pIoXfer->SgBuf, pIoXfer);
513 break;
514 }
515 default:
516 AssertMsgFailed(("Unknown block cache type!\n"));
517 }
518
519 return rc;
520}
521
522/**
523 * Initiates a read I/O task for the given entry.
524 *
525 * @returns VBox status code.
526 * @param pEntry The entry to fetch the data to.
527 */
528static int pdmBlkCacheEntryReadFromMedium(PPDMBLKCACHEENTRY pEntry)
529{
530 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
531 LogFlowFunc((": Reading data into cache entry %#p\n", pEntry));
532
533 /* Make sure no one evicts the entry while it is accessed. */
534 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
535
536 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
537 if (RT_UNLIKELY(!pIoXfer))
538 return VERR_NO_MEMORY;
539
540 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
541
542 pIoXfer->fIoCache = true;
543 pIoXfer->pEntry = pEntry;
544 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
545 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
546 pIoXfer->cbXfer = pEntry->cbData;
547 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_READ;
548 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
549
550 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
551}
552
553/**
554 * Initiates a write I/O task for the given entry.
555 *
556 * @returns nothing.
557 * @param pEntry The entry to read the data from.
558 */
559static int pdmBlkCacheEntryWriteToMedium(PPDMBLKCACHEENTRY pEntry)
560{
561 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
562 LogFlowFunc((": Writing data from cache entry %#p\n", pEntry));
563
564 /* Make sure no one evicts the entry while it is accessed. */
565 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
566
567 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
568 if (RT_UNLIKELY(!pIoXfer))
569 return VERR_NO_MEMORY;
570
571 AssertMsg(pEntry->pbData, ("Entry is in ghost state\n"));
572
573 pIoXfer->fIoCache = true;
574 pIoXfer->pEntry = pEntry;
575 pIoXfer->SgSeg.pvSeg = pEntry->pbData;
576 pIoXfer->SgSeg.cbSeg = pEntry->cbData;
577 pIoXfer->cbXfer = pEntry->cbData;
578 pIoXfer->enmXferDir = PDMBLKCACHEXFERDIR_WRITE;
579 RTSgBufInit(&pIoXfer->SgBuf, &pIoXfer->SgSeg, 1);
580
581 return pdmBlkCacheEnqueue(pBlkCache, pEntry->Core.Key, pIoXfer);
582}
583
584/**
585 * Passthrough a part of a request directly to the I/O manager
586 * handling the endpoint.
587 *
588 * @returns VBox status code.
589 * @param pEndpoint The endpoint.
590 * @param pTask The task.
591 * @param pIoMemCtx The I/O memory context to use.
592 * @param offStart Offset to start transfer from.
593 * @param cbData Amount of data to transfer.
594 * @param enmTransferType The transfer type (read/write)
595 */
596static int pdmBlkCacheRequestPassthrough(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
597 PRTSGBUF pSgBuf, uint64_t offStart, size_t cbData,
598 PDMBLKCACHEXFERDIR enmXferDir)
599{
600
601 PPDMBLKCACHEIOXFER pIoXfer = (PPDMBLKCACHEIOXFER)RTMemAllocZ(sizeof(PDMBLKCACHEIOXFER));
602 if (RT_UNLIKELY(!pIoXfer))
603 return VERR_NO_MEMORY;
604
605 ASMAtomicIncU32(&pReq->cXfersPending);
606 pIoXfer->fIoCache = false;
607 pIoXfer->pReq = pReq;
608 pIoXfer->cbXfer = cbData;
609 pIoXfer->enmXferDir = enmXferDir;
610 RTSgBufClone(&pIoXfer->SgBuf, pSgBuf);
611 RTSgBufAdvance(pSgBuf, cbData);
612
613 return pdmBlkCacheEnqueue(pBlkCache, offStart, pIoXfer);
614}
615
616/**
617 * Commit a single dirty entry to the endpoint
618 *
619 * @returns nothing
620 * @param pEntry The entry to commit.
621 */
622static void pdmBlkCacheEntryCommit(PPDMBLKCACHEENTRY pEntry)
623{
624 AssertMsg( (pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY)
625 && !(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS),
626 ("Invalid flags set for entry %#p\n", pEntry));
627
628 pdmBlkCacheEntryWriteToMedium(pEntry);
629}
630
631/**
632 * Commit all dirty entries for a single endpoint.
633 *
634 * @returns nothing.
635 * @param pBlkCache The endpoint cache to commit.
636 */
637static void pdmBlkCacheCommit(PPDMBLKCACHE pBlkCache)
638{
639 uint32_t cbCommitted = 0;
640 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
641
642 /* The list is moved to a new header to reduce locking overhead. */
643 RTLISTNODE ListDirtyNotCommitted;
644 RTSPINLOCKTMP Tmp;
645
646 RTListInit(&ListDirtyNotCommitted);
647 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
648 RTListMove(&ListDirtyNotCommitted, &pBlkCache->ListDirtyNotCommitted);
649 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
650
651 if (!RTListIsEmpty(&ListDirtyNotCommitted))
652 {
653 PPDMBLKCACHEENTRY pEntry = RTListNodeGetFirst(&ListDirtyNotCommitted,
654 PDMBLKCACHEENTRY,
655 NodeNotCommitted);
656
657 while (!RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted))
658 {
659 PPDMBLKCACHEENTRY pNext = RTListNodeGetNext(&pEntry->NodeNotCommitted, PDMBLKCACHEENTRY,
660 NodeNotCommitted);
661 pdmBlkCacheEntryCommit(pEntry);
662 cbCommitted += pEntry->cbData;
663 RTListNodeRemove(&pEntry->NodeNotCommitted);
664 pEntry = pNext;
665 }
666
667 /* Commit the last endpoint */
668 Assert(RTListNodeIsLast(&ListDirtyNotCommitted, &pEntry->NodeNotCommitted));
669 pdmBlkCacheEntryCommit(pEntry);
670 RTListNodeRemove(&pEntry->NodeNotCommitted);
671 AssertMsg(RTListIsEmpty(&ListDirtyNotCommitted),
672 ("Committed all entries but list is not empty\n"));
673 }
674
675 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
676 AssertMsg(pBlkCache->pCache->cbDirty >= cbCommitted,
677 ("Number of committed bytes exceeds number of dirty bytes\n"));
678 ASMAtomicSubU32(&pBlkCache->pCache->cbDirty, cbCommitted);
679}
680
681/**
682 * Commit all dirty entries in the cache.
683 *
684 * @returns nothing.
685 * @param pCache The global cache instance.
686 */
687static void pdmBlkCacheCommitDirtyEntries(PPDMBLKCACHEGLOBAL pCache)
688{
689 bool fCommitInProgress = ASMAtomicXchgBool(&pCache->fCommitInProgress, true);
690
691 if (!fCommitInProgress)
692 {
693 pdmBlkCacheLockEnter(pCache);
694 Assert(!RTListIsEmpty(&pCache->ListUsers));
695
696 PPDMBLKCACHE pBlkCache = RTListNodeGetFirst(&pCache->ListUsers,
697 PDMBLKCACHE,
698 NodeCacheUser);
699 AssertPtr(pBlkCache);
700
701 while (!RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser))
702 {
703 pdmBlkCacheCommit(pBlkCache);
704
705 pBlkCache = RTListNodeGetNext(&pBlkCache->NodeCacheUser, PDMBLKCACHE,
706 NodeCacheUser);
707 }
708
709 /* Commit the last endpoint */
710 Assert(RTListNodeIsLast(&pCache->ListUsers, &pBlkCache->NodeCacheUser));
711 pdmBlkCacheCommit(pBlkCache);
712
713 pdmBlkCacheLockLeave(pCache);
714 ASMAtomicWriteBool(&pCache->fCommitInProgress, false);
715 }
716}
717
718/**
719 * Adds the given entry as a dirty to the cache.
720 *
721 * @returns Flag whether the amount of dirty bytes in the cache exceeds the threshold
722 * @param pBlkCache The endpoint cache the entry belongs to.
723 * @param pEntry The entry to add.
724 */
725static bool pdmBlkCacheAddDirtyEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
726{
727 bool fDirtyBytesExceeded = false;
728 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
729
730 /* If the commit timer is disabled we commit right away. */
731 if (pCache->u32CommitTimeoutMs == 0)
732 {
733 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
734 pdmBlkCacheEntryCommit(pEntry);
735 }
736 else if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY))
737 {
738 pEntry->fFlags |= PDMBLKCACHE_ENTRY_IS_DIRTY;
739
740 RTSPINLOCKTMP Tmp;
741 RTSpinlockAcquire(pBlkCache->LockList, &Tmp);
742 RTListAppend(&pBlkCache->ListDirtyNotCommitted, &pEntry->NodeNotCommitted);
743 RTSpinlockRelease(pBlkCache->LockList, &Tmp);
744
745 uint32_t cbDirty = ASMAtomicAddU32(&pCache->cbDirty, pEntry->cbData);
746
747 fDirtyBytesExceeded = (cbDirty >= pCache->cbCommitDirtyThreshold);
748 }
749
750 return fDirtyBytesExceeded;
751}
752
753/**
754 * Commit timer callback.
755 */
756static void pdmBlkCacheCommitTimerCallback(PVM pVM, PTMTIMER pTimer, void *pvUser)
757{
758 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
759
760 LogFlowFunc(("Commit interval expired, commiting dirty entries\n"));
761
762 if (ASMAtomicReadU32(&pCache->cbDirty) > 0)
763 pdmBlkCacheCommitDirtyEntries(pCache);
764
765 TMTimerSetMillies(pTimer, pCache->u32CommitTimeoutMs);
766 LogFlowFunc(("Entries committed, going to sleep\n"));
767}
768
769int pdmR3BlkCacheInit(PVM pVM)
770{
771 int rc = VINF_SUCCESS;
772 PUVM pUVM = pVM->pUVM;
773 PPDMBLKCACHEGLOBAL pBlkCacheGlobal;
774
775 LogFlowFunc((": pVM=%p\n", pVM));
776
777 VM_ASSERT_EMT(pVM);
778
779 PCFGMNODE pCfgRoot = CFGMR3GetRoot(pVM);
780 PCFGMNODE pCfgBlkCache = CFGMR3GetChild(CFGMR3GetChild(pCfgRoot, "PDM"), "BlkCache");
781
782 pBlkCacheGlobal = (PPDMBLKCACHEGLOBAL)RTMemAllocZ(sizeof(PDMBLKCACHEGLOBAL));
783 if (!pBlkCacheGlobal)
784 return VERR_NO_MEMORY;
785
786 RTListInit(&pBlkCacheGlobal->ListUsers);
787 pBlkCacheGlobal->pVM = pVM;
788 pBlkCacheGlobal->cRefs = 0;
789 pBlkCacheGlobal->cbCached = 0;
790 pBlkCacheGlobal->fCommitInProgress = false;
791
792 /* Initialize members */
793 pBlkCacheGlobal->LruRecentlyUsedIn.pHead = NULL;
794 pBlkCacheGlobal->LruRecentlyUsedIn.pTail = NULL;
795 pBlkCacheGlobal->LruRecentlyUsedIn.cbCached = 0;
796
797 pBlkCacheGlobal->LruRecentlyUsedOut.pHead = NULL;
798 pBlkCacheGlobal->LruRecentlyUsedOut.pTail = NULL;
799 pBlkCacheGlobal->LruRecentlyUsedOut.cbCached = 0;
800
801 pBlkCacheGlobal->LruFrequentlyUsed.pHead = NULL;
802 pBlkCacheGlobal->LruFrequentlyUsed.pTail = NULL;
803 pBlkCacheGlobal->LruFrequentlyUsed.cbCached = 0;
804
805 do
806 {
807 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheSize", &pBlkCacheGlobal->cbMax, 5 * _1M);
808 AssertLogRelRCBreak(rc);
809 LogFlowFunc(("Maximum number of bytes cached %u\n", pBlkCacheGlobal->cbMax));
810
811 pBlkCacheGlobal->cbRecentlyUsedInMax = (pBlkCacheGlobal->cbMax / 100) * 25; /* 25% of the buffer size */
812 pBlkCacheGlobal->cbRecentlyUsedOutMax = (pBlkCacheGlobal->cbMax / 100) * 50; /* 50% of the buffer size */
813 LogFlowFunc(("cbRecentlyUsedInMax=%u cbRecentlyUsedOutMax=%u\n",
814 pBlkCacheGlobal->cbRecentlyUsedInMax, pBlkCacheGlobal->cbRecentlyUsedOutMax));
815
816 /** @todo r=aeichner: Experiment to find optimal default values */
817 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitIntervalMs", &pBlkCacheGlobal->u32CommitTimeoutMs, 10000 /* 10sec */);
818 AssertLogRelRCBreak(rc);
819 rc = CFGMR3QueryU32Def(pCfgBlkCache, "CacheCommitThreshold", &pBlkCacheGlobal->cbCommitDirtyThreshold, pBlkCacheGlobal->cbMax / 2);
820 AssertLogRelRCBreak(rc);
821 } while (0);
822
823 if (RT_SUCCESS(rc))
824 {
825 STAMR3Register(pVM, &pBlkCacheGlobal->cbMax,
826 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
827 "/PDM/BlkCache/cbMax",
828 STAMUNIT_BYTES,
829 "Maximum cache size");
830 STAMR3Register(pVM, &pBlkCacheGlobal->cbCached,
831 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
832 "/PDM/BlkCache/cbCached",
833 STAMUNIT_BYTES,
834 "Currently used cache");
835 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedIn.cbCached,
836 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
837 "/PDM/BlkCache/cbCachedMruIn",
838 STAMUNIT_BYTES,
839 "Number of bytes cached in MRU list");
840 STAMR3Register(pVM, &pBlkCacheGlobal->LruRecentlyUsedOut.cbCached,
841 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
842 "/PDM/BlkCache/cbCachedMruOut",
843 STAMUNIT_BYTES,
844 "Number of bytes cached in FRU list");
845 STAMR3Register(pVM, &pBlkCacheGlobal->LruFrequentlyUsed.cbCached,
846 STAMTYPE_U32, STAMVISIBILITY_ALWAYS,
847 "/PDM/BlkCache/cbCachedFru",
848 STAMUNIT_BYTES,
849 "Number of bytes cached in FRU ghost list");
850
851#ifdef VBOX_WITH_STATISTICS
852 STAMR3Register(pVM, &pBlkCacheGlobal->cHits,
853 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
854 "/PDM/BlkCache/CacheHits",
855 STAMUNIT_COUNT, "Number of hits in the cache");
856 STAMR3Register(pVM, &pBlkCacheGlobal->cPartialHits,
857 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
858 "/PDM/BlkCache/CachePartialHits",
859 STAMUNIT_COUNT, "Number of partial hits in the cache");
860 STAMR3Register(pVM, &pBlkCacheGlobal->cMisses,
861 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
862 "/PDM/BlkCache/CacheMisses",
863 STAMUNIT_COUNT, "Number of misses when accessing the cache");
864 STAMR3Register(pVM, &pBlkCacheGlobal->StatRead,
865 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
866 "/PDM/BlkCache/CacheRead",
867 STAMUNIT_BYTES, "Number of bytes read from the cache");
868 STAMR3Register(pVM, &pBlkCacheGlobal->StatWritten,
869 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
870 "/PDM/BlkCache/CacheWritten",
871 STAMUNIT_BYTES, "Number of bytes written to the cache");
872 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeGet,
873 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
874 "/PDM/BlkCache/CacheTreeGet",
875 STAMUNIT_TICKS_PER_CALL, "Time taken to access an entry in the tree");
876 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeInsert,
877 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
878 "/PDM/BlkCache/CacheTreeInsert",
879 STAMUNIT_TICKS_PER_CALL, "Time taken to insert an entry in the tree");
880 STAMR3Register(pVM, &pBlkCacheGlobal->StatTreeRemove,
881 STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
882 "/PDM/BlkCache/CacheTreeRemove",
883 STAMUNIT_TICKS_PER_CALL, "Time taken to remove an entry an the tree");
884 STAMR3Register(pVM, &pBlkCacheGlobal->StatBuffersReused,
885 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
886 "/PDM/BlkCache/CacheBuffersReused",
887 STAMUNIT_COUNT, "Number of times a buffer could be reused");
888#endif
889
890 /* Initialize the critical section */
891 rc = RTCritSectInit(&pBlkCacheGlobal->CritSect);
892 }
893
894 if (RT_SUCCESS(rc))
895 {
896 /* Create the commit timer */
897 if (pBlkCacheGlobal->u32CommitTimeoutMs > 0)
898 rc = TMR3TimerCreateInternal(pVM, TMCLOCK_REAL,
899 pdmBlkCacheCommitTimerCallback,
900 pBlkCacheGlobal,
901 "BlkCache-Commit",
902 &pBlkCacheGlobal->pTimerCommit);
903
904 if (RT_SUCCESS(rc))
905 {
906 LogRel(("BlkCache: Cache successfully initialised. Cache size is %u bytes\n", pBlkCacheGlobal->cbMax));
907 LogRel(("BlkCache: Cache commit interval is %u ms\n", pBlkCacheGlobal->u32CommitTimeoutMs));
908 LogRel(("BlkCache: Cache commit threshold is %u bytes\n", pBlkCacheGlobal->cbCommitDirtyThreshold));
909 pUVM->pdm.s.pBlkCacheGlobal = pBlkCacheGlobal;
910 return VINF_SUCCESS;
911 }
912
913 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
914 }
915
916 if (pBlkCacheGlobal)
917 RTMemFree(pBlkCacheGlobal);
918
919 LogFlowFunc((": returns rc=%Rrc\n", pVM, rc));
920 return rc;
921}
922
923void pdmR3BlkCacheTerm(PVM pVM)
924{
925 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
926
927 if (pBlkCacheGlobal)
928 {
929 /* Make sure no one else uses the cache now */
930 pdmBlkCacheLockEnter(pBlkCacheGlobal);
931
932 /* Cleanup deleting all cache entries waiting for in progress entries to finish. */
933 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedIn);
934 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruRecentlyUsedOut);
935 pdmBlkCacheDestroyList(&pBlkCacheGlobal->LruFrequentlyUsed);
936
937 pdmBlkCacheLockLeave(pBlkCacheGlobal);
938
939 RTCritSectDelete(&pBlkCacheGlobal->CritSect);
940 RTMemFree(pBlkCacheGlobal);
941 pVM->pUVM->pdm.s.pBlkCacheGlobal = NULL;
942 }
943}
944
945static int pdmR3BlkCacheRetain(PVM pVM, PPPDMBLKCACHE ppBlkCache, const char *pcszId)
946{
947 int rc = VINF_SUCCESS;
948 PPDMBLKCACHE pBlkCache = NULL;
949 bool fAlreadyExists = false;
950 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
951
952 if (!pBlkCacheGlobal)
953 return VERR_NOT_SUPPORTED;
954
955 /*
956 * Check that no other user cache has the same id first,
957 * Unique id's are necessary in case the state is saved.
958 */
959 pdmBlkCacheLockEnter(pBlkCacheGlobal);
960
961 RTListForEach(&pBlkCacheGlobal->ListUsers, pBlkCache, PDMBLKCACHE, NodeCacheUser)
962 {
963 if (!RTStrCmp(pBlkCache->pszId, pcszId))
964 {
965 fAlreadyExists = true;
966 break;
967 }
968 }
969
970 if (!fAlreadyExists)
971 {
972 pBlkCache = (PPDMBLKCACHE)RTMemAllocZ(sizeof(PDMBLKCACHE));
973
974 if (pBlkCache)
975 pBlkCache->pszId = RTStrDup(pcszId);
976
977 if ( pBlkCache
978 && pBlkCache->pszId)
979 {
980 pBlkCache->pCache = pBlkCacheGlobal;
981 RTListInit(&pBlkCache->ListDirtyNotCommitted);
982
983 rc = RTSpinlockCreate(&pBlkCache->LockList);
984 if (RT_SUCCESS(rc))
985 {
986 rc = RTSemRWCreate(&pBlkCache->SemRWEntries);
987 if (RT_SUCCESS(rc))
988 {
989 pBlkCache->pTree = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
990 if (pBlkCache->pTree)
991 {
992 /* Arm the timer if this is the first endpoint. */
993 if ( pBlkCacheGlobal->cRefs == 1
994 && pBlkCacheGlobal->u32CommitTimeoutMs > 0)
995 rc = TMTimerSetMillies(pBlkCacheGlobal->pTimerCommit, pBlkCacheGlobal->u32CommitTimeoutMs);
996
997 if (RT_SUCCESS(rc))
998 {
999#ifdef VBOX_WITH_STATISTICS
1000 STAMR3RegisterF(pBlkCacheGlobal->pVM, &pBlkCache->StatWriteDeferred,
1001 STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS,
1002 STAMUNIT_COUNT, "Number of deferred writes",
1003 "/PDM/BlkCache/%s/Cache/DeferredWrites", pBlkCache->pszId);
1004#endif
1005
1006 /* Add to the list of users. */
1007 pBlkCacheGlobal->cRefs++;
1008 RTListAppend(&pBlkCacheGlobal->ListUsers, &pBlkCache->NodeCacheUser);
1009 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1010
1011 *ppBlkCache = pBlkCache;
1012 LogFlowFunc(("returns success\n"));
1013 return VINF_SUCCESS;
1014 }
1015 }
1016 else
1017 rc = VERR_NO_MEMORY;
1018
1019 RTSemRWDestroy(pBlkCache->SemRWEntries);
1020 }
1021
1022 RTSpinlockDestroy(pBlkCache->LockList);
1023 }
1024
1025 RTStrFree(pBlkCache->pszId);
1026 }
1027 else
1028 rc = VERR_NO_MEMORY;
1029
1030 if (pBlkCache)
1031 RTMemFree(pBlkCache);
1032 }
1033 else
1034 rc = VERR_ALREADY_EXISTS;
1035
1036 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1037
1038 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1039 return rc;
1040}
1041
1042VMMR3DECL(int) PDMR3BlkCacheRetainDriver(PVM pVM, PPDMDRVINS pDrvIns, PPPDMBLKCACHE ppBlkCache,
1043 PFNPDMBLKCACHEXFERCOMPLETEDRV pfnXferComplete,
1044 PFNPDMBLKCACHEXFERENQUEUEDRV pfnXferEnqueue,
1045 const char *pcszId)
1046{
1047 int rc = VINF_SUCCESS;
1048 PPDMBLKCACHE pBlkCache;
1049
1050 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1051 if (RT_SUCCESS(rc))
1052 {
1053 pBlkCache->enmType = PDMBLKCACHETYPE_DRV;
1054 pBlkCache->u.Drv.pfnXferComplete = pfnXferComplete;
1055 pBlkCache->u.Drv.pfnXferEnqueue = pfnXferEnqueue;
1056 pBlkCache->u.Drv.pDrvIns = pDrvIns;
1057 *ppBlkCache = pBlkCache;
1058 }
1059
1060 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1061 return rc;
1062}
1063
1064VMMR3DECL(int) PDMR3BlkCacheRetainDevice(PVM pVM, PPDMDEVINS pDevIns, PPPDMBLKCACHE ppBlkCache,
1065 PFNPDMBLKCACHEXFERCOMPLETEDEV pfnXferComplete,
1066 PFNPDMBLKCACHEXFERENQUEUEDEV pfnXferEnqueue,
1067 const char *pcszId)
1068{
1069 int rc = VINF_SUCCESS;
1070 PPDMBLKCACHE pBlkCache;
1071
1072 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1073 if (RT_SUCCESS(rc))
1074 {
1075 pBlkCache->enmType = PDMBLKCACHETYPE_DEV;
1076 pBlkCache->u.Dev.pfnXferComplete = pfnXferComplete;
1077 pBlkCache->u.Dev.pfnXferEnqueue = pfnXferEnqueue;
1078 pBlkCache->u.Dev.pDevIns = pDevIns;
1079 *ppBlkCache = pBlkCache;
1080 }
1081
1082 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1083 return rc;
1084
1085}
1086
1087VMMR3DECL(int) PDMR3BlkCacheRetainUsb(PVM pVM, PPDMUSBINS pUsbIns, PPPDMBLKCACHE ppBlkCache,
1088 PFNPDMBLKCACHEXFERCOMPLETEUSB pfnXferComplete,
1089 PFNPDMBLKCACHEXFERENQUEUEUSB pfnXferEnqueue,
1090 const char *pcszId)
1091{
1092 int rc = VINF_SUCCESS;
1093 PPDMBLKCACHE pBlkCache;
1094
1095 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1096 if (RT_SUCCESS(rc))
1097 {
1098 pBlkCache->enmType = PDMBLKCACHETYPE_USB;
1099 pBlkCache->u.Usb.pfnXferComplete = pfnXferComplete;
1100 pBlkCache->u.Usb.pfnXferEnqueue = pfnXferEnqueue;
1101 pBlkCache->u.Usb.pUsbIns = pUsbIns;
1102 *ppBlkCache = pBlkCache;
1103 }
1104
1105 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1106 return rc;
1107
1108}
1109
1110VMMR3DECL(int) PDMR3BlkCacheRetainInt(PVM pVM, void *pvUser, PPPDMBLKCACHE ppBlkCache,
1111 PFNPDMBLKCACHEXFERCOMPLETEINT pfnXferComplete,
1112 PFNPDMBLKCACHEXFERENQUEUEINT pfnXferEnqueue,
1113 const char *pcszId)
1114{
1115 int rc = VINF_SUCCESS;
1116 PPDMBLKCACHE pBlkCache;
1117
1118 rc = pdmR3BlkCacheRetain(pVM, &pBlkCache, pcszId);
1119 if (RT_SUCCESS(rc))
1120 {
1121 pBlkCache->enmType = PDMBLKCACHETYPE_INTERNAL;
1122 pBlkCache->u.Int.pfnXferComplete = pfnXferComplete;
1123 pBlkCache->u.Int.pfnXferEnqueue = pfnXferEnqueue;
1124 pBlkCache->u.Int.pvUser = pvUser;
1125 *ppBlkCache = pBlkCache;
1126 }
1127
1128 LogFlowFunc(("Leave rc=%Rrc\n", rc));
1129 return rc;
1130
1131}
1132
1133/**
1134 * Callback for the AVL destroy routine. Frees a cache entry for this endpoint.
1135 *
1136 * @returns IPRT status code.
1137 * @param pNode The node to destroy.
1138 * @param pvUser Opaque user data.
1139 */
1140static int pdmBlkCacheEntryDestroy(PAVLRU64NODECORE pNode, void *pvUser)
1141{
1142 PPDMBLKCACHEENTRY pEntry = (PPDMBLKCACHEENTRY)pNode;
1143 PPDMBLKCACHEGLOBAL pCache = (PPDMBLKCACHEGLOBAL)pvUser;
1144 PPDMBLKCACHE pBlkCache = pEntry->pBlkCache;
1145
1146 while (ASMAtomicReadU32(&pEntry->fFlags) & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY))
1147 {
1148 /* Leave the locks to let the I/O thread make progress but reference the entry to prevent eviction. */
1149 pdmBlkCacheEntryRef(pEntry);
1150 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1151 pdmBlkCacheLockLeave(pCache);
1152
1153 RTThreadSleep(250);
1154
1155 /* Re-enter all locks */
1156 pdmBlkCacheLockEnter(pCache);
1157 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1158 pdmBlkCacheEntryRelease(pEntry);
1159 }
1160
1161 AssertMsg(!(pEntry->fFlags & (PDMBLKCACHE_ENTRY_IO_IN_PROGRESS | PDMBLKCACHE_ENTRY_IS_DIRTY)),
1162 ("Entry is dirty and/or still in progress fFlags=%#x\n", pEntry->fFlags));
1163
1164 bool fUpdateCache = pEntry->pList == &pCache->LruFrequentlyUsed
1165 || pEntry->pList == &pCache->LruRecentlyUsedIn;
1166
1167 pdmBlkCacheEntryRemoveFromList(pEntry);
1168
1169 if (fUpdateCache)
1170 pdmBlkCacheSub(pCache, pEntry->cbData);
1171
1172 RTMemPageFree(pEntry->pbData, pEntry->cbData);
1173 RTMemFree(pEntry);
1174
1175 return VINF_SUCCESS;
1176}
1177
1178/**
1179 * Destroys all cache resources used by the given endpoint.
1180 *
1181 * @returns nothing.
1182 * @param pEndpoint The endpoint to the destroy.
1183 */
1184VMMR3DECL(void) PDMR3BlkCacheRelease(PPDMBLKCACHE pBlkCache)
1185{
1186 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1187
1188 /* Make sure nobody is accessing the cache while we delete the tree. */
1189 pdmBlkCacheLockEnter(pCache);
1190 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1191 RTAvlrU64Destroy(pBlkCache->pTree, pdmBlkCacheEntryDestroy, pCache);
1192 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1193
1194 RTSpinlockDestroy(pBlkCache->LockList);
1195
1196 pCache->cRefs--;
1197 RTListNodeRemove(&pBlkCache->NodeCacheUser);
1198
1199 if ( !pCache->cRefs
1200 && pCache->u32CommitTimeoutMs > 0)
1201 TMTimerStop(pCache->pTimerCommit);
1202
1203 pdmBlkCacheLockLeave(pCache);
1204
1205 RTSemRWDestroy(pBlkCache->SemRWEntries);
1206
1207#ifdef VBOX_WITH_STATISTICS
1208 STAMR3Deregister(pCache->pVM, &pBlkCache->StatWriteDeferred);
1209#endif
1210
1211 RTStrFree(pBlkCache->pszId);
1212 RTMemFree(pBlkCache);
1213}
1214
1215VMMR3DECL(void) PDMR3BlkCacheReleaseDevice(PVM pVM, PPDMDEVINS pDevIns)
1216{
1217 LogFlow(("%s: pDevIns=%p\n", __FUNCTION__, pDevIns));
1218
1219 /*
1220 * Validate input.
1221 */
1222 if (!pDevIns)
1223 return;
1224 VM_ASSERT_EMT(pVM);
1225
1226 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1227 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1228
1229 /* Return silently if not supported. */
1230 if (!pBlkCacheGlobal)
1231 return;
1232
1233 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1234
1235 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1236 {
1237 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DEV
1238 && pBlkCache->u.Dev.pDevIns == pDevIns)
1239 PDMR3BlkCacheRelease(pBlkCache);
1240 }
1241
1242 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1243}
1244
1245VMMR3DECL(void) PDMR3BlkCacheReleaseDriver(PVM pVM, PPDMDRVINS pDrvIns)
1246{
1247 LogFlow(("%s: pDrvIns=%p\n", __FUNCTION__, pDrvIns));
1248
1249 /*
1250 * Validate input.
1251 */
1252 if (!pDrvIns)
1253 return;
1254 VM_ASSERT_EMT(pVM);
1255
1256 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1257 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1258
1259 /* Return silently if not supported. */
1260 if (!pBlkCacheGlobal)
1261 return;
1262
1263 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1264
1265 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1266 {
1267 if ( pBlkCache->enmType == PDMBLKCACHETYPE_DRV
1268 && pBlkCache->u.Drv.pDrvIns == pDrvIns)
1269 PDMR3BlkCacheRelease(pBlkCache);
1270 }
1271
1272 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1273}
1274
1275VMMR3DECL(void) PDMR3BlkCacheReleaseUsb(PVM pVM, PPDMUSBINS pUsbIns)
1276{
1277 LogFlow(("%s: pUsbIns=%p\n", __FUNCTION__, pUsbIns));
1278
1279 /*
1280 * Validate input.
1281 */
1282 if (!pUsbIns)
1283 return;
1284 VM_ASSERT_EMT(pVM);
1285
1286 PPDMBLKCACHEGLOBAL pBlkCacheGlobal = pVM->pUVM->pdm.s.pBlkCacheGlobal;
1287 PPDMBLKCACHE pBlkCache, pBlkCacheNext;
1288
1289 /* Return silently if not supported. */
1290 if (!pBlkCacheGlobal)
1291 return;
1292
1293 pdmBlkCacheLockEnter(pBlkCacheGlobal);
1294
1295 RTListForEachSafe(&pBlkCacheGlobal->ListUsers, pBlkCache, pBlkCacheNext, PDMBLKCACHE, NodeCacheUser)
1296 {
1297 if ( pBlkCache->enmType == PDMBLKCACHETYPE_USB
1298 && pBlkCache->u.Usb.pUsbIns == pUsbIns)
1299 PDMR3BlkCacheRelease(pBlkCache);
1300 }
1301
1302 pdmBlkCacheLockLeave(pBlkCacheGlobal);
1303}
1304
1305static PPDMBLKCACHEENTRY pdmBlkCacheGetCacheEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off)
1306{
1307 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1308 PPDMBLKCACHEENTRY pEntry = NULL;
1309
1310 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1311
1312 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1313 pEntry = (PPDMBLKCACHEENTRY)RTAvlrU64RangeGet(pBlkCache->pTree, off);
1314 if (pEntry)
1315 pdmBlkCacheEntryRef(pEntry);
1316 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1317
1318 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1319
1320 return pEntry;
1321}
1322
1323/**
1324 * Return the best fit cache entries for the given offset.
1325 *
1326 * @returns nothing.
1327 * @param pBlkCache The endpoint cache.
1328 * @param off The offset.
1329 * @param pEntryAbove Where to store the pointer to the best fit entry above the
1330 * the given offset. NULL if not required.
1331 * @param pEntryBelow Where to store the pointer to the best fit entry below the
1332 * the given offset. NULL if not required.
1333 */
1334static void pdmBlkCacheGetCacheBestFitEntryByOffset(PPDMBLKCACHE pBlkCache, uint64_t off,
1335 PPDMBLKCACHEENTRY *ppEntryAbove,
1336 PPDMBLKCACHEENTRY *ppEntryBelow)
1337{
1338 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1339
1340 STAM_PROFILE_ADV_START(&pCache->StatTreeGet, Cache);
1341
1342 RTSemRWRequestRead(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1343 if (ppEntryAbove)
1344 {
1345 *ppEntryAbove = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, true /*fAbove*/);
1346 if (*ppEntryAbove)
1347 pdmBlkCacheEntryRef(*ppEntryAbove);
1348 }
1349
1350 if (ppEntryBelow)
1351 {
1352 *ppEntryBelow = (PPDMBLKCACHEENTRY)RTAvlrU64GetBestFit(pBlkCache->pTree, off, false /*fAbove*/);
1353 if (*ppEntryBelow)
1354 pdmBlkCacheEntryRef(*ppEntryBelow);
1355 }
1356 RTSemRWReleaseRead(pBlkCache->SemRWEntries);
1357
1358 STAM_PROFILE_ADV_STOP(&pCache->StatTreeGet, Cache);
1359}
1360
1361static void pdmBlkCacheInsertEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEENTRY pEntry)
1362{
1363 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1364
1365 STAM_PROFILE_ADV_START(&pCache->StatTreeInsert, Cache);
1366 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1367 bool fInserted = RTAvlrU64Insert(pBlkCache->pTree, &pEntry->Core);
1368 AssertMsg(fInserted, ("Node was not inserted into tree\n"));
1369 STAM_PROFILE_ADV_STOP(&pCache->StatTreeInsert, Cache);
1370 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1371}
1372
1373/**
1374 * Allocates and initializes a new entry for the cache.
1375 * The entry has a reference count of 1.
1376 *
1377 * @returns Pointer to the new cache entry or NULL if out of memory.
1378 * @param pBlkCache The cache the entry belongs to.
1379 * @param off Start offset.
1380 * @param cbData Size of the cache entry.
1381 * @param pbBuffer Pointer to the buffer to use.
1382 * NULL if a new buffer should be allocated.
1383 * The buffer needs to have the same size of the entry.
1384 */
1385static PPDMBLKCACHEENTRY pdmBlkCacheEntryAlloc(PPDMBLKCACHE pBlkCache,
1386 uint64_t off, size_t cbData, uint8_t *pbBuffer)
1387{
1388 PPDMBLKCACHEENTRY pEntryNew = (PPDMBLKCACHEENTRY)RTMemAllocZ(sizeof(PDMBLKCACHEENTRY));
1389
1390 if (RT_UNLIKELY(!pEntryNew))
1391 return NULL;
1392
1393 pEntryNew->Core.Key = off;
1394 pEntryNew->Core.KeyLast = off + cbData - 1;
1395 pEntryNew->pBlkCache = pBlkCache;
1396 pEntryNew->fFlags = 0;
1397 pEntryNew->cRefs = 1; /* We are using it now. */
1398 pEntryNew->pList = NULL;
1399 pEntryNew->cbData = cbData;
1400 pEntryNew->pWaitingHead = NULL;
1401 pEntryNew->pWaitingTail = NULL;
1402 if (pbBuffer)
1403 pEntryNew->pbData = pbBuffer;
1404 else
1405 pEntryNew->pbData = (uint8_t *)RTMemPageAlloc(cbData);
1406
1407 if (RT_UNLIKELY(!pEntryNew->pbData))
1408 {
1409 RTMemFree(pEntryNew);
1410 return NULL;
1411 }
1412
1413 return pEntryNew;
1414}
1415
1416/**
1417 * Checks that a set of flags is set/clear acquiring the R/W semaphore
1418 * in exclusive mode.
1419 *
1420 * @returns true if the flag in fSet is set and the one in fClear is clear.
1421 * false otherwise.
1422 * The R/W semaphore is only held if true is returned.
1423 *
1424 * @param pBlkCache The endpoint cache instance data.
1425 * @param pEntry The entry to check the flags for.
1426 * @param fSet The flag which is tested to be set.
1427 * @param fClear The flag which is tested to be clear.
1428 */
1429DECLINLINE(bool) pdmBlkCacheEntryFlagIsSetClearAcquireLock(PPDMBLKCACHE pBlkCache,
1430 PPDMBLKCACHEENTRY pEntry,
1431 uint32_t fSet, uint32_t fClear)
1432{
1433 uint32_t fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1434 bool fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1435
1436 if (fPassed)
1437 {
1438 /* Acquire the lock and check again because the completion callback might have raced us. */
1439 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1440
1441 fFlags = ASMAtomicReadU32(&pEntry->fFlags);
1442 fPassed = ((fFlags & fSet) && !(fFlags & fClear));
1443
1444 /* Drop the lock if we didn't passed the test. */
1445 if (!fPassed)
1446 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1447 }
1448
1449 return fPassed;
1450}
1451
1452/**
1453 * Adds a segment to the waiting list for a cache entry
1454 * which is currently in progress.
1455 *
1456 * @returns nothing.
1457 * @param pEntry The cache entry to add the segment to.
1458 * @param pSeg The segment to add.
1459 */
1460DECLINLINE(void) pdmBlkCacheEntryAddWaiter(PPDMBLKCACHEENTRY pEntry,
1461 PPDMBLKCACHEWAITER pWaiter)
1462{
1463 pWaiter->pNext = NULL;
1464
1465 if (pEntry->pWaitingHead)
1466 {
1467 AssertPtr(pEntry->pWaitingTail);
1468
1469 pEntry->pWaitingTail->pNext = pWaiter;
1470 pEntry->pWaitingTail = pWaiter;
1471 }
1472 else
1473 {
1474 Assert(!pEntry->pWaitingTail);
1475
1476 pEntry->pWaitingHead = pWaiter;
1477 pEntry->pWaitingTail = pWaiter;
1478 }
1479}
1480
1481/**
1482 * Add a buffer described by the I/O memory context
1483 * to the entry waiting for completion.
1484 *
1485 * @returns VBox status code.
1486 * @param pEntry The entry to add the buffer to.
1487 * @param pTask Task associated with the buffer.
1488 * @param pIoMemCtx The memory context to use.
1489 * @param offDiff Offset from the start of the buffer
1490 * in the entry.
1491 * @param cbData Amount of data to wait for onthis entry.
1492 * @param fWrite Flag whether the task waits because it wants to write
1493 * to the cache entry.
1494 */
1495static int pdmBlkCacheEntryWaitersAdd(PPDMBLKCACHEENTRY pEntry,
1496 PPDMBLKCACHEREQ pReq,
1497 PCRTSGBUF pSgBuf, uint64_t offDiff,
1498 size_t cbData, bool fWrite)
1499{
1500 PPDMBLKCACHEWAITER pWaiter = (PPDMBLKCACHEWAITER)RTMemAllocZ(sizeof(PDMBLKCACHEWAITER));
1501 if (!pWaiter)
1502 return VERR_NO_MEMORY;
1503
1504 ASMAtomicIncU32(&pReq->cXfersPending);
1505 pWaiter->pReq = pReq;
1506 pWaiter->offCacheEntry = offDiff;
1507 pWaiter->cbTransfer = cbData;
1508 pWaiter->fWrite = fWrite;
1509 RTSgBufClone(&pWaiter->SgBuf, pSgBuf);
1510
1511 pdmBlkCacheEntryAddWaiter(pEntry, pWaiter);
1512
1513 return VINF_SUCCESS;
1514}
1515
1516/**
1517 * Calculate aligned offset and size for a new cache entry
1518 * which do not intersect with an already existing entry and the
1519 * file end.
1520 *
1521 * @returns The number of bytes the entry can hold of the requested amount
1522 * of byte.
1523 * @param pEndpoint The endpoint.
1524 * @param pBlkCache The endpoint cache.
1525 * @param off The start offset.
1526 * @param cb The number of bytes the entry needs to hold at least.
1527 * @param uAlignment Alignment of the boundary sizes.
1528 * @param poffAligned Where to store the aligned offset.
1529 * @param pcbAligned Where to store the aligned size of the entry.
1530 */
1531static size_t pdmBlkCacheEntryBoundariesCalc(PPDMBLKCACHE pBlkCache,
1532 uint64_t off, size_t cb,
1533 unsigned uAlignment,
1534 uint64_t *poffAligned, size_t *pcbAligned)
1535{
1536 size_t cbAligned;
1537 size_t cbInEntry = 0;
1538 uint64_t offAligned;
1539 PPDMBLKCACHEENTRY pEntryAbove = NULL;
1540 PPDMBLKCACHEENTRY pEntryBelow = NULL;
1541
1542 /* Get the best fit entries around the offset */
1543 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off, &pEntryAbove, &pEntryBelow);
1544
1545 /* Log the info */
1546 LogFlow(("%sest fit entry below off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1547 pEntryBelow ? "B" : "No b",
1548 off,
1549 pEntryBelow ? pEntryBelow->Core.Key : 0,
1550 pEntryBelow ? pEntryBelow->Core.KeyLast : 0,
1551 pEntryBelow ? pEntryBelow->cbData : 0));
1552
1553 LogFlow(("%sest fit entry above off=%llu (BestFit=%llu BestFitEnd=%llu BestFitSize=%u)\n",
1554 pEntryAbove ? "B" : "No b",
1555 off,
1556 pEntryAbove ? pEntryAbove->Core.Key : 0,
1557 pEntryAbove ? pEntryAbove->Core.KeyLast : 0,
1558 pEntryAbove ? pEntryAbove->cbData : 0));
1559
1560 /* Align the offset first. */
1561 offAligned = off & ~(uint64_t)(512-1);
1562 if ( pEntryBelow
1563 && offAligned <= pEntryBelow->Core.KeyLast)
1564 offAligned = pEntryBelow->Core.KeyLast;
1565
1566 if ( pEntryAbove
1567 && off + cb > pEntryAbove->Core.Key)
1568 {
1569 cbInEntry = pEntryAbove->Core.Key - off;
1570 cbAligned = pEntryAbove->Core.Key - offAligned;
1571 }
1572 else
1573 {
1574 cbAligned = cb;
1575 cbInEntry = cb;
1576#if 0
1577 /*
1578 * Align the size to a 4KB boundary.
1579 * Memory size is aligned to a page boundary
1580 * and memory is wasted if the size is rather small.
1581 * (For example reads with a size of 512 bytes).
1582 */
1583 cbInEntry = cb;
1584 cbAligned = RT_ALIGN_Z(cb + (off - offAligned), uAlignment);
1585
1586 /*
1587 * Clip to file size if the original request doesn't
1588 * exceed the file (not an appending write)
1589 */
1590 uint64_t cbReq = off + cb;
1591 if (cbReq >= pEndpoint->cbFile)
1592 cbAligned = cbReq - offAligned;
1593 else
1594 cbAligned = RT_MIN(pEndpoint->cbFile - offAligned, cbAligned);
1595 if (pEntryAbove)
1596 {
1597 Assert(pEntryAbove->Core.Key >= off);
1598 cbAligned = RT_MIN(cbAligned, pEntryAbove->Core.Key - offAligned);
1599 }
1600#endif
1601 }
1602
1603 /* A few sanity checks */
1604 AssertMsg(!pEntryBelow || pEntryBelow->Core.KeyLast < offAligned,
1605 ("Aligned start offset intersects with another cache entry\n"));
1606 AssertMsg(!pEntryAbove || (offAligned + cbAligned) <= pEntryAbove->Core.Key,
1607 ("Aligned size intersects with another cache entry\n"));
1608 Assert(cbInEntry <= cbAligned);
1609
1610 if (pEntryBelow)
1611 pdmBlkCacheEntryRelease(pEntryBelow);
1612 if (pEntryAbove)
1613 pdmBlkCacheEntryRelease(pEntryAbove);
1614
1615 LogFlow(("offAligned=%llu cbAligned=%u\n", offAligned, cbAligned));
1616
1617 *poffAligned = offAligned;
1618 *pcbAligned = cbAligned;
1619
1620 return cbInEntry;
1621}
1622
1623/**
1624 * Create a new cache entry evicting data from the cache if required.
1625 *
1626 * @returns Pointer to the new cache entry or NULL
1627 * if not enough bytes could be evicted from the cache.
1628 * @param pEndpoint The endpoint.
1629 * @param pBlkCache The endpoint cache.
1630 * @param off The offset.
1631 * @param cb Number of bytes the cache entry should have.
1632 * @param uAlignment Alignment the size of the entry should have.
1633 * @param pcbData Where to store the number of bytes the new
1634 * entry can hold. May be lower than actually requested
1635 * due to another entry intersecting the access range.
1636 */
1637static PPDMBLKCACHEENTRY pdmBlkCacheEntryCreate(PPDMBLKCACHE pBlkCache,
1638 uint64_t off, size_t cb,
1639 unsigned uAlignment,
1640 size_t *pcbData)
1641{
1642 uint64_t offStart = 0;
1643 size_t cbEntry = 0;
1644 PPDMBLKCACHEENTRY pEntryNew = NULL;
1645 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1646 uint8_t *pbBuffer = NULL;
1647
1648 *pcbData = pdmBlkCacheEntryBoundariesCalc(pBlkCache, off, cb, uAlignment,
1649 &offStart, &cbEntry);
1650
1651 pdmBlkCacheLockEnter(pCache);
1652 bool fEnough = pdmBlkCacheReclaim(pCache, cbEntry, true, &pbBuffer);
1653
1654 if (fEnough)
1655 {
1656 LogFlow(("Evicted enough bytes (%u requested). Creating new cache entry\n", cbEntry));
1657
1658 pEntryNew = pdmBlkCacheEntryAlloc(pBlkCache, offStart, cbEntry, pbBuffer);
1659 if (RT_LIKELY(pEntryNew))
1660 {
1661 pdmBlkCacheEntryAddToList(&pCache->LruRecentlyUsedIn, pEntryNew);
1662 pdmBlkCacheAdd(pCache, cbEntry);
1663 pdmBlkCacheLockLeave(pCache);
1664
1665 pdmBlkCacheInsertEntry(pBlkCache, pEntryNew);
1666
1667 AssertMsg( (off >= pEntryNew->Core.Key)
1668 && (off + *pcbData <= pEntryNew->Core.KeyLast + 1),
1669 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1670 off, pEntryNew->Core.Key));
1671 }
1672 else
1673 pdmBlkCacheLockLeave(pCache);
1674 }
1675 else
1676 pdmBlkCacheLockLeave(pCache);
1677
1678 return pEntryNew;
1679}
1680
1681static PPDMBLKCACHEREQ pdmBlkCacheReqAlloc(size_t cbXfer, void *pvUser)
1682{
1683 PPDMBLKCACHEREQ pReq = (PPDMBLKCACHEREQ)RTMemAlloc(sizeof(PDMBLKCACHEREQ));
1684
1685 if (RT_LIKELY(pReq))
1686 {
1687 pReq->pvUser = pvUser;
1688 pReq->cbXfer = cbXfer;
1689 pReq->rcReq = VINF_SUCCESS;
1690 pReq->cXfersPending = 0;
1691 }
1692
1693 return pReq;
1694}
1695
1696static void pdmBlkCacheReqComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq)
1697{
1698 switch (pBlkCache->enmType)
1699 {
1700 case PDMBLKCACHETYPE_DEV:
1701 {
1702 pBlkCache->u.Dev.pfnXferComplete(pBlkCache->u.Dev.pDevIns,
1703 pReq->pvUser, pReq->rcReq);
1704 break;
1705 }
1706 case PDMBLKCACHETYPE_DRV:
1707 {
1708 pBlkCache->u.Drv.pfnXferComplete(pBlkCache->u.Drv.pDrvIns,
1709 pReq->pvUser, pReq->rcReq);
1710 break;
1711 }
1712 case PDMBLKCACHETYPE_USB:
1713 {
1714 pBlkCache->u.Usb.pfnXferComplete(pBlkCache->u.Usb.pUsbIns,
1715 pReq->pvUser, pReq->rcReq);
1716 break;
1717 }
1718 case PDMBLKCACHETYPE_INTERNAL:
1719 {
1720 pBlkCache->u.Int.pfnXferComplete(pBlkCache->u.Int.pvUser,
1721 pReq->pvUser, pReq->rcReq);
1722 break;
1723 }
1724 default:
1725 AssertMsgFailed(("Unknown block cache type!\n"));
1726 }
1727
1728 RTMemFree(pReq);
1729}
1730
1731static bool pdmBlkCacheReqUpdate(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEREQ pReq,
1732 size_t cbComplete, int rcReq, bool fCallHandler)
1733{
1734 if (RT_FAILURE(rcReq))
1735 ASMAtomicCmpXchgS32(&pReq->rcReq, rcReq, VINF_SUCCESS);
1736
1737 AssertMsg(pReq->cbXfer >= cbComplete, ("Completed more than left\n"));
1738 AssertMsg(pReq->cXfersPending > 0, ("No transfers are pending for this request\n"));
1739 uint32_t cbOld = ASMAtomicSubU32(&pReq->cbXfer, cbComplete);
1740 uint32_t cXfersPending = ASMAtomicDecU32(&pReq->cXfersPending);
1741
1742 if ( !(cbOld - cbComplete)
1743 && !cXfersPending)
1744 {
1745 if (fCallHandler)
1746 pdmBlkCacheReqComplete(pBlkCache, pReq);
1747 return true;
1748 }
1749
1750 LogFlowFunc(("pReq=%#p cXfersPending=%u cbXfer=%u\n", pReq, cXfersPending, (cbOld - cbComplete)));
1751 return false;
1752}
1753
1754VMMR3DECL(int) PDMR3BlkCacheRead(PPDMBLKCACHE pBlkCache, uint64_t off,
1755 PCRTSGBUF pcSgBuf, size_t cbRead, void *pvUser)
1756{
1757 int rc = VINF_SUCCESS;
1758 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1759 PPDMBLKCACHEENTRY pEntry;
1760 PPDMBLKCACHEREQ pReq;
1761
1762 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbRead=%u pvUser=%#p\n",
1763 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbRead, pvUser));
1764
1765 RTSGBUF SgBuf;
1766 RTSgBufClone(&SgBuf, pcSgBuf);
1767
1768 /* Allocate new request structure. */
1769 pReq = pdmBlkCacheReqAlloc(cbRead, pvUser);
1770 if (RT_UNLIKELY(!pReq))
1771 return VERR_NO_MEMORY;
1772
1773 /* Increment data transfer counter to keep the request valid while we access it. */
1774 ASMAtomicIncU32(&pReq->cXfersPending);
1775
1776 while (cbRead)
1777 {
1778 size_t cbToRead;
1779
1780 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1781
1782 /*
1783 * If there is no entry we try to create a new one eviciting unused pages
1784 * if the cache is full. If this is not possible we will pass the request through
1785 * and skip the caching (all entries may be still in progress so they can't
1786 * be evicted)
1787 * If we have an entry it can be in one of the LRU lists where the entry
1788 * contains data (recently used or frequently used LRU) so we can just read
1789 * the data we need and put the entry at the head of the frequently used LRU list.
1790 * In case the entry is in one of the ghost lists it doesn't contain any data.
1791 * We have to fetch it again evicting pages from either T1 or T2 to make room.
1792 */
1793 if (pEntry)
1794 {
1795 uint64_t offDiff = off - pEntry->Core.Key;
1796
1797 AssertMsg(off >= pEntry->Core.Key,
1798 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
1799 off, pEntry->Core.Key));
1800
1801 AssertPtr(pEntry->pList);
1802
1803 cbToRead = RT_MIN(pEntry->cbData - offDiff, cbRead);
1804
1805 AssertMsg(off + cbToRead <= pEntry->Core.Key + pEntry->Core.KeyLast + 1,
1806 ("Buffer of cache entry exceeded off=%llu cbToRead=%d\n",
1807 off, cbToRead));
1808
1809 cbRead -= cbToRead;
1810
1811 if (!cbRead)
1812 STAM_COUNTER_INC(&pCache->cHits);
1813 else
1814 STAM_COUNTER_INC(&pCache->cPartialHits);
1815
1816 STAM_COUNTER_ADD(&pCache->StatRead, cbToRead);
1817
1818 /* Ghost lists contain no data. */
1819 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
1820 || (pEntry->pList == &pCache->LruFrequentlyUsed))
1821 {
1822 if (pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
1823 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
1824 PDMBLKCACHE_ENTRY_IS_DIRTY))
1825 {
1826 /* Entry didn't completed yet. Append to the list */
1827 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1828 &SgBuf, offDiff, cbToRead,
1829 false /* fWrite */);
1830 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1831 }
1832 else
1833 {
1834 /* Read as much as we can from the entry. */
1835 RTSgBufCopyFromBuf(&SgBuf, pEntry->pbData + offDiff, cbToRead);
1836 ASMAtomicSubU32(&pReq->cbXfer, cbToRead);
1837 }
1838
1839 /* Move this entry to the top position */
1840 if (pEntry->pList == &pCache->LruFrequentlyUsed)
1841 {
1842 pdmBlkCacheLockEnter(pCache);
1843 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1844 pdmBlkCacheLockLeave(pCache);
1845 }
1846 /* Release the entry */
1847 pdmBlkCacheEntryRelease(pEntry);
1848 }
1849 else
1850 {
1851 uint8_t *pbBuffer = NULL;
1852
1853 LogFlow(("Fetching data for ghost entry %#p from file\n", pEntry));
1854
1855 pdmBlkCacheLockEnter(pCache);
1856 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
1857 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
1858
1859 /* Move the entry to Am and fetch it to the cache. */
1860 if (fEnough)
1861 {
1862 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
1863 pdmBlkCacheAdd(pCache, pEntry->cbData);
1864 pdmBlkCacheLockLeave(pCache);
1865
1866 if (pbBuffer)
1867 pEntry->pbData = pbBuffer;
1868 else
1869 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
1870 AssertPtr(pEntry->pbData);
1871
1872 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
1873 &SgBuf, offDiff, cbToRead,
1874 false /* fWrite */);
1875 pdmBlkCacheEntryReadFromMedium(pEntry);
1876 /* Release the entry */
1877 pdmBlkCacheEntryRelease(pEntry);
1878 }
1879 else
1880 {
1881 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
1882 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
1883 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
1884 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
1885 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
1886
1887 pdmBlkCacheLockLeave(pCache);
1888
1889 RTMemFree(pEntry);
1890
1891 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1892 &SgBuf, off, cbToRead,
1893 PDMBLKCACHEXFERDIR_READ);
1894 }
1895 }
1896 }
1897 else
1898 {
1899#ifdef VBOX_WITH_IO_READ_CACHE
1900 /* No entry found for this offset. Create a new entry and fetch the data to the cache. */
1901 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
1902 off, cbRead,
1903 PAGE_SIZE,
1904 &cbToRead);
1905
1906 cbRead -= cbToRead;
1907
1908 if (pEntryNew)
1909 {
1910 if (!cbRead)
1911 STAM_COUNTER_INC(&pCache->cMisses);
1912 else
1913 STAM_COUNTER_INC(&pCache->cPartialHits);
1914
1915 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
1916 &SgBuf,
1917 off - pEntryNew->Core.Key,
1918 cbToRead,
1919 false /* fWrite */);
1920 pdmBlkCacheEntryReadFromMedium(pEntryNew);
1921 pdmBlkCacheEntryRelease(pEntryNew); /* it is protected by the I/O in progress flag now. */
1922 }
1923 else
1924 {
1925 /*
1926 * There is not enough free space in the cache.
1927 * Pass the request directly to the I/O manager.
1928 */
1929 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToRead));
1930
1931 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1932 &SgBuf, off, cbToRead,
1933 PDMBLKCACHEXFERDIR_READ);
1934 }
1935#else
1936 /* Clip read size if necessary. */
1937 PPDMBLKCACHEENTRY pEntryAbove;
1938 pdmBlkCacheGetCacheBestFitEntryByOffset(pBlkCache, off,
1939 &pEntryAbove, NULL);
1940
1941 if (pEntryAbove)
1942 {
1943 if (off + cbRead > pEntryAbove->Core.Key)
1944 cbToRead = pEntryAbove->Core.Key - off;
1945 else
1946 cbToRead = cbRead;
1947
1948 pdmBlkCacheEntryRelease(pEntryAbove);
1949 }
1950 else
1951 cbToRead = cbRead;
1952
1953 cbRead -= cbToRead;
1954 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
1955 &SgBuf, off, cbToRead,
1956 PDMBLKCACHEXFERDIR_READ);
1957#endif
1958 }
1959 off += cbToRead;
1960 }
1961
1962 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
1963 rc = VINF_AIO_TASK_PENDING;
1964
1965 LogFlowFunc((": Leave rc=%Rrc\n", rc));
1966
1967 return rc;
1968}
1969
1970VMMR3DECL(int) PDMR3BlkCacheWrite(PPDMBLKCACHE pBlkCache, uint64_t off,
1971 PCRTSGBUF pcSgBuf, size_t cbWrite, void *pvUser)
1972{
1973 int rc = VINF_SUCCESS;
1974 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
1975 PPDMBLKCACHEENTRY pEntry;
1976 PPDMBLKCACHEREQ pReq;
1977
1978 LogFlowFunc((": pBlkCache=%#p{%s} off=%llu pcSgBuf=%#p cbWrite=%u pvUser=%#p\n",
1979 pBlkCache, pBlkCache->pszId, off, pcSgBuf, cbWrite, pvUser));
1980
1981 RTSGBUF SgBuf;
1982 RTSgBufClone(&SgBuf, pcSgBuf);
1983
1984 /* Allocate new request structure. */
1985 pReq = pdmBlkCacheReqAlloc(cbWrite, pvUser);
1986 if (RT_UNLIKELY(!pReq))
1987 return VERR_NO_MEMORY;
1988
1989 /* Increment data transfer counter to keep the request valid while we access it. */
1990 ASMAtomicIncU32(&pReq->cXfersPending);
1991
1992 while (cbWrite)
1993 {
1994 size_t cbToWrite;
1995
1996 pEntry = pdmBlkCacheGetCacheEntryByOffset(pBlkCache, off);
1997
1998 if (pEntry)
1999 {
2000 /* Write the data into the entry and mark it as dirty */
2001 AssertPtr(pEntry->pList);
2002
2003 uint64_t offDiff = off - pEntry->Core.Key;
2004
2005 AssertMsg(off >= pEntry->Core.Key,
2006 ("Overflow in calculation off=%llu OffsetAligned=%llu\n",
2007 off, pEntry->Core.Key));
2008
2009 cbToWrite = RT_MIN(pEntry->cbData - offDiff, cbWrite);
2010 cbWrite -= cbToWrite;
2011
2012 if (!cbWrite)
2013 STAM_COUNTER_INC(&pCache->cHits);
2014 else
2015 STAM_COUNTER_INC(&pCache->cPartialHits);
2016
2017 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2018
2019 /* Ghost lists contain no data. */
2020 if ( (pEntry->pList == &pCache->LruRecentlyUsedIn)
2021 || (pEntry->pList == &pCache->LruFrequentlyUsed))
2022 {
2023 /* Check if the entry is dirty. */
2024 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2025 PDMBLKCACHE_ENTRY_IS_DIRTY,
2026 0))
2027 {
2028 /* If it is already dirty but not in progress just update the data. */
2029 if (!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IO_IN_PROGRESS))
2030 {
2031 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff,
2032 cbToWrite);
2033 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2034 }
2035 else
2036 {
2037 /* The data isn't written to the file yet */
2038 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2039 &SgBuf, offDiff, cbToWrite,
2040 true /* fWrite */);
2041 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2042 }
2043
2044 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2045 }
2046 else /* Dirty bit not set */
2047 {
2048 /*
2049 * Check if a read is in progress for this entry.
2050 * We have to defer processing in that case.
2051 */
2052 if(pdmBlkCacheEntryFlagIsSetClearAcquireLock(pBlkCache, pEntry,
2053 PDMBLKCACHE_ENTRY_IO_IN_PROGRESS,
2054 0))
2055 {
2056 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2057 &SgBuf, offDiff, cbToWrite,
2058 true /* fWrite */);
2059 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2060 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2061 }
2062 else /* I/O in progress flag not set */
2063 {
2064 /* Write as much as we can into the entry and update the file. */
2065 RTSgBufCopyToBuf(&SgBuf, pEntry->pbData + offDiff, cbToWrite);
2066 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2067
2068 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2069 if (fCommit)
2070 pdmBlkCacheCommitDirtyEntries(pCache);
2071 }
2072 } /* Dirty bit not set */
2073
2074 /* Move this entry to the top position */
2075 if (pEntry->pList == &pCache->LruFrequentlyUsed)
2076 {
2077 pdmBlkCacheLockEnter(pCache);
2078 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2079 pdmBlkCacheLockLeave(pCache);
2080 }
2081
2082 pdmBlkCacheEntryRelease(pEntry);
2083 }
2084 else /* Entry is on the ghost list */
2085 {
2086 uint8_t *pbBuffer = NULL;
2087
2088 pdmBlkCacheLockEnter(pCache);
2089 pdmBlkCacheEntryRemoveFromList(pEntry); /* Remove it before we remove data, otherwise it may get freed when evicting data. */
2090 bool fEnough = pdmBlkCacheReclaim(pCache, pEntry->cbData, true, &pbBuffer);
2091
2092 if (fEnough)
2093 {
2094 /* Move the entry to Am and fetch it to the cache. */
2095 pdmBlkCacheEntryAddToList(&pCache->LruFrequentlyUsed, pEntry);
2096 pdmBlkCacheAdd(pCache, pEntry->cbData);
2097 pdmBlkCacheLockLeave(pCache);
2098
2099 if (pbBuffer)
2100 pEntry->pbData = pbBuffer;
2101 else
2102 pEntry->pbData = (uint8_t *)RTMemPageAlloc(pEntry->cbData);
2103 AssertPtr(pEntry->pbData);
2104
2105 pdmBlkCacheEntryWaitersAdd(pEntry, pReq,
2106 &SgBuf, offDiff, cbToWrite,
2107 true /* fWrite */);
2108 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2109 pdmBlkCacheEntryReadFromMedium(pEntry);
2110
2111 /* Release the reference. If it is still needed the I/O in progress flag should protect it now. */
2112 pdmBlkCacheEntryRelease(pEntry);
2113 }
2114 else
2115 {
2116 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2117 STAM_PROFILE_ADV_START(&pCache->StatTreeRemove, Cache);
2118 RTAvlrU64Remove(pBlkCache->pTree, pEntry->Core.Key);
2119 STAM_PROFILE_ADV_STOP(&pCache->StatTreeRemove, Cache);
2120 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2121
2122 pdmBlkCacheLockLeave(pCache);
2123
2124 RTMemFree(pEntry);
2125 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2126 &SgBuf, off, cbToWrite,
2127 PDMBLKCACHEXFERDIR_WRITE);
2128 }
2129 }
2130 }
2131 else /* No entry found */
2132 {
2133 /*
2134 * No entry found. Try to create a new cache entry to store the data in and if that fails
2135 * write directly to the file.
2136 */
2137 PPDMBLKCACHEENTRY pEntryNew = pdmBlkCacheEntryCreate(pBlkCache,
2138 off, cbWrite,
2139 512, &cbToWrite);
2140
2141 cbWrite -= cbToWrite;
2142
2143 if (pEntryNew)
2144 {
2145 uint64_t offDiff = off - pEntryNew->Core.Key;
2146
2147 STAM_COUNTER_INC(&pCache->cHits);
2148
2149 /*
2150 * Check if it is possible to just write the data without waiting
2151 * for it to get fetched first.
2152 */
2153 if (!offDiff && pEntryNew->cbData == cbToWrite)
2154 {
2155 RTSgBufCopyToBuf(&SgBuf, pEntryNew->pbData, cbToWrite);
2156 ASMAtomicSubU32(&pReq->cbXfer, cbToWrite);
2157
2158 bool fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntryNew);
2159 if (fCommit)
2160 pdmBlkCacheCommitDirtyEntries(pCache);
2161 STAM_COUNTER_ADD(&pCache->StatWritten, cbToWrite);
2162 }
2163 else
2164 {
2165 /* Defer the write and fetch the data from the endpoint. */
2166 pdmBlkCacheEntryWaitersAdd(pEntryNew, pReq,
2167 &SgBuf, offDiff, cbToWrite,
2168 true /* fWrite */);
2169 STAM_COUNTER_INC(&pBlkCache->StatWriteDeferred);
2170 pdmBlkCacheEntryReadFromMedium(pEntryNew);
2171 }
2172
2173 pdmBlkCacheEntryRelease(pEntryNew);
2174 }
2175 else
2176 {
2177 /*
2178 * There is not enough free space in the cache.
2179 * Pass the request directly to the I/O manager.
2180 */
2181 LogFlow(("Couldn't evict %u bytes from the cache. Remaining request will be passed through\n", cbToWrite));
2182
2183 STAM_COUNTER_INC(&pCache->cMisses);
2184
2185 pdmBlkCacheRequestPassthrough(pBlkCache, pReq,
2186 &SgBuf, off, cbToWrite,
2187 PDMBLKCACHEXFERDIR_WRITE);
2188 }
2189 }
2190
2191 off += cbToWrite;
2192 }
2193
2194 if (!pdmBlkCacheReqUpdate(pBlkCache, pReq, 0, rc, false))
2195 rc = VINF_AIO_TASK_PENDING;
2196
2197 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2198
2199 return rc;
2200}
2201
2202VMMR3DECL(int) PDMR3BlkCacheFlush(PPDMBLKCACHE pBlkCache, void *pvUser)
2203{
2204 int rc = VINF_SUCCESS;
2205
2206 LogFlowFunc((": pBlkCache=%#p{%s}\n", pBlkCache, pBlkCache->pszId));
2207
2208 /* Commit dirty entries in the cache. */
2209 pdmBlkCacheCommit(pBlkCache);
2210
2211 LogFlowFunc((": Leave rc=%Rrc\n", rc));
2212 return rc;
2213}
2214
2215/**
2216 * Completes a task segment freeing all resources and completes the task handle
2217 * if everything was transferred.
2218 *
2219 * @returns Next task segment handle.
2220 * @param pTaskSeg Task segment to complete.
2221 * @param rc Status code to set.
2222 */
2223static PPDMBLKCACHEWAITER pdmBlkCacheWaiterComplete(PPDMBLKCACHE pBlkCache,
2224 PPDMBLKCACHEWAITER pWaiter,
2225 int rc)
2226{
2227 PPDMBLKCACHEWAITER pNext = pWaiter->pNext;
2228 PPDMBLKCACHEREQ pReq = pWaiter->pReq;
2229
2230 pdmBlkCacheReqUpdate(pBlkCache, pWaiter->pReq, pWaiter->cbTransfer, rc, true);
2231
2232 RTMemFree(pWaiter);
2233
2234 return pNext;
2235}
2236
2237static void pdmBlkCacheIoXferCompleteEntry(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2238{
2239 PPDMBLKCACHEENTRY pEntry = hIoXfer->pEntry;
2240 PPDMBLKCACHEGLOBAL pCache = pBlkCache->pCache;
2241
2242 /* Reference the entry now as we are clearing the I/O in progress flag
2243 * which protected the entry till now. */
2244 pdmBlkCacheEntryRef(pEntry);
2245
2246 RTSemRWRequestWrite(pBlkCache->SemRWEntries, RT_INDEFINITE_WAIT);
2247 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IO_IN_PROGRESS;
2248
2249 /* Process waiting segment list. The data in entry might have changed in-between. */
2250 bool fDirty = false;
2251 PPDMBLKCACHEWAITER pComplete = pEntry->pWaitingHead;
2252 PPDMBLKCACHEWAITER pCurr = pComplete;
2253
2254 AssertMsg((pCurr && pEntry->pWaitingTail) || (!pCurr && !pEntry->pWaitingTail),
2255 ("The list tail was not updated correctly\n"));
2256 pEntry->pWaitingTail = NULL;
2257 pEntry->pWaitingHead = NULL;
2258
2259 if (hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_WRITE)
2260 {
2261 /*
2262 * An error here is difficult to handle as the original request completed already.
2263 * The error is logged for now and the VM is paused.
2264 * If the user continues the entry is written again in the hope
2265 * the user fixed the problem and the next write succeeds.
2266 */
2267 /** @todo r=aeichner: This solution doesn't work
2268 * The user will get the message but the VM will hang afterwards
2269 * VMR3Suspend() returns when the VM is suspended but suspending
2270 * the VM will reopen the images readonly in DrvVD. They are closed first
2271 * which will close the endpoints. This will block EMT while the
2272 * I/O manager processes the close request but the IO manager is stuck
2273 * in the VMR3Suspend call and can't process the request.
2274 * Another problem is that closing the VM means flushing the cache
2275 * but the entry failed and will probably fail again.
2276 * No idea so far how to solve this problem... but the user gets informed
2277 * at least.
2278 */
2279 if (RT_FAILURE(rcIoXfer))
2280 {
2281 LogRel(("I/O cache: Error while writing entry at offset %llu (%u bytes) to medium \"%s\"\n",
2282 pEntry->Core.Key, pEntry->cbData, pBlkCache->pszId));
2283
2284 int rc = VMSetRuntimeError(pCache->pVM, VMSETRTERR_FLAGS_SUSPEND | VMSETRTERR_FLAGS_NO_WAIT, "BLKCACHE_IOERR",
2285 N_("The I/O cache encountered an error while updating data in medium \"%s\" (rc=%Rrc)."
2286 "Make sure there is enough free space on the disk and that the disk is working properly."
2287 "Operation can be resumed afterwards."),
2288 pBlkCache->pszId, rcIoXfer);
2289 AssertRC(rc);
2290 }
2291 else
2292 {
2293 pEntry->fFlags &= ~PDMBLKCACHE_ENTRY_IS_DIRTY;
2294
2295 while (pCurr)
2296 {
2297 AssertMsg(pCurr->fWrite, ("Completed write entries should never have read tasks attached\n"));
2298
2299 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2300 fDirty = true;
2301
2302 pCurr = pCurr->pNext;
2303 }
2304 }
2305 }
2306 else
2307 {
2308 AssertMsg(hIoXfer->enmXferDir == PDMBLKCACHEXFERDIR_READ, ("Invalid transfer type\n"));
2309 AssertMsg(!(pEntry->fFlags & PDMBLKCACHE_ENTRY_IS_DIRTY),
2310 ("Invalid flags set\n"));
2311
2312 while (pCurr)
2313 {
2314 if (pCurr->fWrite)
2315 {
2316 RTSgBufCopyToBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2317 fDirty = true;
2318 }
2319 else
2320 RTSgBufCopyFromBuf(&pCurr->SgBuf, pEntry->pbData + pCurr->offCacheEntry, pCurr->cbTransfer);
2321
2322 pCurr = pCurr->pNext;
2323 }
2324 }
2325
2326 bool fCommit = false;
2327 if (fDirty)
2328 fCommit = pdmBlkCacheAddDirtyEntry(pBlkCache, pEntry);
2329
2330 RTSemRWReleaseWrite(pBlkCache->SemRWEntries);
2331
2332 /* Dereference so that it isn't protected anymore except we issued anyother write for it. */
2333 pdmBlkCacheEntryRelease(pEntry);
2334
2335 if (fCommit)
2336 pdmBlkCacheCommitDirtyEntries(pCache);
2337
2338 /* Complete waiters now. */
2339 while (pComplete)
2340 pComplete = pdmBlkCacheWaiterComplete(pBlkCache, pComplete, rcIoXfer);
2341}
2342
2343VMMR3DECL(void) PDMR3BlkCacheIoXferComplete(PPDMBLKCACHE pBlkCache, PPDMBLKCACHEIOXFER hIoXfer, int rcIoXfer)
2344{
2345 LogFlowFunc(("pBlkCache=%#p hIoXfer=%#p rcIoXfer=%Rrc\n", pBlkCache, hIoXfer, rcIoXfer));
2346
2347 if (hIoXfer->fIoCache)
2348 pdmBlkCacheIoXferCompleteEntry(pBlkCache, hIoXfer, rcIoXfer);
2349 else
2350 pdmBlkCacheReqUpdate(pBlkCache, hIoXfer->pReq, hIoXfer->cbXfer, rcIoXfer, true);
2351}
2352
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette