VirtualBox

Changeset 45155 in vbox


Ignore:
Timestamp:
Mar 24, 2013 8:08:10 PM (12 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
84477
Message:

Storage: Another shot at the I/O unification after fixing a bug

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Storage/VD.cpp

    r44799 r45155  
    355355            /** S/G buffer */
    356356            RTSGBUF              SgBuf;
     357            /** Number of bytes to clear in the buffer before the current read. */
     358            size_t               cbBufClear;
     359            /** Number of images to read. */
     360            unsigned             cImagesRead;
     361            /** Override for the parent image to start reading from. */
     362            PVDIMAGE             pImageParentOverride;
    357363        } Io;
    358364        /** Discard requests. */
     
    424430
    425431/** Default flags for an I/O context, i.e. unblocked and async. */
    426 #define VDIOCTX_FLAGS_DEFAULT (0)
     432#define VDIOCTX_FLAGS_DEFAULT                   (0)
    427433/** Flag whether the context is blocked. */
    428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
     434#define VDIOCTX_FLAGS_BLOCKED          RT_BIT_32(0)
    429435/** Flag whether the I/O context is using synchronous I/O. */
    430 #define VDIOCTX_FLAGS_SYNC    RT_BIT_32(1)
     436#define VDIOCTX_FLAGS_SYNC             RT_BIT_32(1)
     437/** Flag whether the read should update the cache. */
     438#define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2)
     439/** Flag whether free blocks should be zeroed.
     440 * If false and no image has data for sepcified
     441 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
     442 * Note that unallocated blocks are still zeroed
     443 * if at least one image has valid data for a part
     444 * of the range.
     445 */
     446#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
     447/** Don't free the I/O context when complete because
     448 * it was alloacted elsewhere (stack, ...). */
     449#define VDIOCTX_FLAGS_DONT_FREE        RT_BIT_32(4)
    431450
    432451/** NIL I/O context pointer value. */
     
    577596/** Forward declaration of the async discard helper. */
    578597static int vdDiscardHelperAsync(PVDIOCTX pIoCtx);
     598static int vdWriteHelperAsync(PVDIOCTX pIoCtx);
    579599static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk);
    580600static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc);
     601static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
    581602
    582603/**
     
    809830    pIoCtx->Req.Io.pImageStart    = pImageStart;
    810831    pIoCtx->Req.Io.pImageCur      = pImageStart;
     832    pIoCtx->Req.Io.cbBufClear     = 0;
     833    pIoCtx->Req.Io.pImageParentOverride = NULL;
    811834    pIoCtx->cDataTransfersPending = 0;
    812835    pIoCtx->cMetaTransfersPending = 0;
     
    817840    pIoCtx->pfnIoCtxTransferNext  = NULL;
    818841    pIoCtx->rcReq                 = VINF_SUCCESS;
     842    pIoCtx->pIoCtxParent          = NULL;
    819843
    820844    /* There is no S/G list for a flush request. */
     
    845869 */
    846870static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
    847                              PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)
     871                             size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
    848872{
    849873    int rc = VINF_SUCCESS;
     
    908932
    909933/**
     934 * Creates a new empty discard state.
     935 *
     936 * @returns Pointer to the new discard state or NULL if out of memory.
     937 */
     938static PVDDISCARDSTATE vdDiscardStateCreate(void)
     939{
     940    PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
     941
     942    if (pDiscard)
     943    {
     944        RTListInit(&pDiscard->ListLru);
     945        pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
     946        if (!pDiscard->pTreeBlocks)
     947        {
     948            RTMemFree(pDiscard);
     949            pDiscard = NULL;
     950        }
     951    }
     952
     953    return pDiscard;
     954}
     955
     956/**
     957 * Removes the least recently used blocks from the waiting list until
     958 * the new value is reached.
     959 *
     960 * @returns VBox status code.
     961 * @param   pDisk              VD disk container.
     962 * @param   pDiscard           The discard state.
     963 * @param   cbDiscardingNew    How many bytes should be waiting on success.
     964 *                             The number of bytes waiting can be less.
     965 */
     966static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
     967{
     968    int rc = VINF_SUCCESS;
     969
     970    LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
     971                 pDisk, pDiscard, cbDiscardingNew));
     972
     973    while (pDiscard->cbDiscarding > cbDiscardingNew)
     974    {
     975        PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
     976
     977        Assert(!RTListIsEmpty(&pDiscard->ListLru));
     978
     979        /* Go over the allocation bitmap and mark all discarded sectors as unused. */
     980        uint64_t offStart = pBlock->Core.Key;
     981        uint32_t idxStart = 0;
     982        size_t cbLeft = pBlock->cbDiscard;
     983        bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
     984        uint32_t cSectors = pBlock->cbDiscard / 512;
     985
     986        while (cbLeft > 0)
     987        {
     988            int32_t idxEnd;
     989            size_t cbThis = cbLeft;
     990
     991            if (fAllocated)
     992            {
     993                /* Check for the first unallocated bit. */
     994                idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
     995                if (idxEnd != -1)
     996                {
     997                    cbThis = (idxEnd - idxStart) * 512;
     998                    fAllocated = false;
     999                }
     1000            }
     1001            else
     1002            {
     1003                /* Mark as unused and check for the first set bit. */
     1004                idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
     1005                if (idxEnd != -1)
     1006                    cbThis = (idxEnd - idxStart) * 512;
     1007
     1008
     1009                VDIOCTX IoCtx;
     1010                vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
     1011                            NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1012                rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
     1013                                                            &IoCtx, offStart, cbThis, NULL,
     1014                                                            NULL, &cbThis, NULL,
     1015                                                            VD_DISCARD_MARK_UNUSED);
     1016                if (RT_FAILURE(rc))
     1017                    break;
     1018
     1019                fAllocated = true;
     1020            }
     1021
     1022            idxStart  = idxEnd;
     1023            offStart += cbThis;
     1024            cbLeft   -= cbThis;
     1025        }
     1026
     1027        if (RT_FAILURE(rc))
     1028            break;
     1029
     1030        PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
     1031        Assert(pBlockRemove == pBlock);
     1032        RTListNodeRemove(&pBlock->NodeLru);
     1033
     1034        pDiscard->cbDiscarding -= pBlock->cbDiscard;
     1035        RTMemFree(pBlock->pbmAllocated);
     1036        RTMemFree(pBlock);
     1037    }
     1038
     1039    Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
     1040
     1041    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1042    return rc;
     1043}
     1044
     1045/**
     1046 * Destroys the current discard state, writing any waiting blocks to the image.
     1047 *
     1048 * @returns VBox status code.
     1049 * @param   pDisk    VD disk container.
     1050 */
     1051static int vdDiscardStateDestroy(PVBOXHDD pDisk)
     1052{
     1053    int rc = VINF_SUCCESS;
     1054
     1055    if (pDisk->pDiscard)
     1056    {
     1057        rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
     1058        AssertRC(rc);
     1059        RTMemFree(pDisk->pDiscard->pTreeBlocks);
     1060        RTMemFree(pDisk->pDiscard);
     1061        pDisk->pDiscard = NULL;
     1062    }
     1063
     1064    return rc;
     1065}
     1066
     1067/**
     1068 * Marks the given range as allocated in the image.
     1069 * Required if there are discards in progress and a write to a block which can get discarded
     1070 * is written to.
     1071 *
     1072 * @returns VBox status code.
     1073 * @param   pDisk    VD container data.
     1074 * @param   uOffset  First byte to mark as allocated.
     1075 * @param   cbRange  Number of bytes to mark as allocated.
     1076 */
     1077static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
     1078{
     1079    PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
     1080    int rc = VINF_SUCCESS;
     1081
     1082    if (pDiscard)
     1083    {
     1084        do
     1085        {
     1086            size_t cbThisRange = cbRange;
     1087            PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
     1088
     1089            if (pBlock)
     1090            {
     1091                int32_t idxStart, idxEnd;
     1092
     1093                Assert(!(cbThisRange % 512));
     1094                Assert(!((uOffset - pBlock->Core.Key) % 512));
     1095
     1096                cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
     1097
     1098                idxStart = (uOffset - pBlock->Core.Key) / 512;
     1099                idxEnd = idxStart + (cbThisRange / 512);
     1100                ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
     1101            }
     1102            else
     1103            {
     1104                pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
     1105                if (pBlock)
     1106                    cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
     1107            }
     1108
     1109            Assert(cbRange >= cbThisRange);
     1110
     1111            uOffset += cbThisRange;
     1112            cbRange -= cbThisRange;
     1113        } while (cbRange != 0);
     1114    }
     1115
     1116    return rc;
     1117}
     1118
     1119DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1120                                  uint64_t uOffset, size_t cbTransfer,
     1121                                  PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
     1122                                  void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1123                                  uint32_t fFlags)
     1124{
     1125    PVDIOCTX pIoCtx = NULL;
     1126
     1127    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1128    if (RT_LIKELY(pIoCtx))
     1129    {
     1130        vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1131                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1132    }
     1133
     1134    return pIoCtx;
     1135}
     1136
     1137DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1138                                      uint64_t uOffset, size_t cbTransfer,
     1139                                      PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1140                                      PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1141                                      void *pvUser1, void *pvUser2,
     1142                                      void *pvAllocation,
     1143                                      PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1144                                      uint32_t fFlags)
     1145{
     1146    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1147                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1148
     1149    if (RT_LIKELY(pIoCtx))
     1150    {
     1151        pIoCtx->pIoCtxParent          = NULL;
     1152        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1153        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1154        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1155    }
     1156
     1157    LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
     1158    return pIoCtx;
     1159}
     1160
     1161DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
     1162                                         unsigned cRanges,
     1163                                         PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1164                                         void *pvUser1, void *pvUser2,
     1165                                         void *pvAllocation,
     1166                                         PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1167                                         uint32_t fFlags)
     1168{
     1169    PVDIOCTX pIoCtx = NULL;
     1170
     1171    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1172    if (RT_LIKELY(pIoCtx))
     1173    {
     1174        pIoCtx->pIoCtxNext                = NULL;
     1175        pIoCtx->pDisk                     = pDisk;
     1176        pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
     1177        pIoCtx->cDataTransfersPending     = 0;
     1178        pIoCtx->cMetaTransfersPending     = 0;
     1179        pIoCtx->fComplete                 = false;
     1180        pIoCtx->fFlags                    = fFlags;
     1181        pIoCtx->pvAllocation              = pvAllocation;
     1182        pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
     1183        pIoCtx->pfnIoCtxTransferNext      = NULL;
     1184        pIoCtx->rcReq                     = VINF_SUCCESS;
     1185        pIoCtx->Req.Discard.paRanges      = paRanges;
     1186        pIoCtx->Req.Discard.cRanges       = cRanges;
     1187        pIoCtx->Req.Discard.idxRange      = 0;
     1188        pIoCtx->Req.Discard.cbDiscardLeft = 0;
     1189        pIoCtx->Req.Discard.offCur        = 0;
     1190        pIoCtx->Req.Discard.cbThisDiscard = 0;
     1191
     1192        pIoCtx->pIoCtxParent          = NULL;
     1193        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1194        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1195        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1196    }
     1197
     1198    LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
     1199    return pIoCtx;
     1200}
     1201
     1202DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1203                                       uint64_t uOffset, size_t cbTransfer,
     1204                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1205                                       PVDIOCTX pIoCtxParent, size_t cbTransferParent,
     1206                                       size_t cbWriteParent, void *pvAllocation,
     1207                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
     1208{
     1209    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1210                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE);
     1211
     1212    AssertPtr(pIoCtxParent);
     1213    Assert(!pIoCtxParent->pIoCtxParent);
     1214
     1215    if (RT_LIKELY(pIoCtx))
     1216    {
     1217        pIoCtx->pIoCtxParent                   = pIoCtxParent;
     1218        pIoCtx->Type.Child.uOffsetSaved        = uOffset;
     1219        pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
     1220        pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
     1221        pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
     1222    }
     1223
     1224    LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
     1225    return pIoCtx;
     1226}
     1227
     1228DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
     1229{
     1230    PVDIOTASK pIoTask = NULL;
     1231
     1232    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1233    if (pIoTask)
     1234    {
     1235        pIoTask->pIoStorage           = pIoStorage;
     1236        pIoTask->pfnComplete          = pfnComplete;
     1237        pIoTask->pvUser               = pvUser;
     1238        pIoTask->fMeta                = false;
     1239        pIoTask->Type.User.cbTransfer = cbTransfer;
     1240        pIoTask->Type.User.pIoCtx     = pIoCtx;
     1241    }
     1242
     1243    return pIoTask;
     1244}
     1245
     1246DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
     1247{
     1248    PVDIOTASK pIoTask = NULL;
     1249
     1250    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1251    if (pIoTask)
     1252    {
     1253        pIoTask->pIoStorage          = pIoStorage;
     1254        pIoTask->pfnComplete         = pfnComplete;
     1255        pIoTask->pvUser              = pvUser;
     1256        pIoTask->fMeta               = true;
     1257        pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
     1258    }
     1259
     1260    return pIoTask;
     1261}
     1262
     1263DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1264{
     1265    Log(("Freeing I/O context %#p\n", pIoCtx));
     1266
     1267    if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
     1268    {
     1269        if (pIoCtx->pvAllocation)
     1270            RTMemFree(pIoCtx->pvAllocation);
     1271#ifdef DEBUG
     1272        memset(&pIoCtx->pDisk, 0xff, sizeof(void *));
     1273#endif
     1274        RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
     1275    }
     1276}
     1277
     1278DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
     1279{
     1280//#ifdef DEBUG
     1281    memset(pIoTask, 0xff, sizeof(VDIOTASK));
     1282//#endif
     1283    RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
     1284}
     1285
     1286DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
     1287{
     1288    AssertPtr(pIoCtx->pIoCtxParent);
     1289
     1290    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     1291    pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
     1292    pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
     1293}
     1294
     1295DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
     1296{
     1297    PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
     1298
     1299    if (RT_LIKELY(pMetaXfer))
     1300    {
     1301        pMetaXfer->Core.Key     = uOffset;
     1302        pMetaXfer->Core.KeyLast = uOffset + cb - 1;
     1303        pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
     1304        pMetaXfer->cbMeta       = cb;
     1305        pMetaXfer->pIoStorage   = pIoStorage;
     1306        pMetaXfer->cRefs        = 0;
     1307        RTListInit(&pMetaXfer->ListIoCtxWaiting);
     1308    }
     1309    return pMetaXfer;
     1310}
     1311
     1312DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
     1313{
     1314    /* Put it on the waiting list. */
     1315    PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
     1316    PVDIOCTX pHeadOld;
     1317    pIoCtx->pIoCtxNext = pNext;
     1318    while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
     1319    {
     1320        pNext = pHeadOld;
     1321        Assert(pNext != pIoCtx);
     1322        pIoCtx->pIoCtxNext = pNext;
     1323        ASMNopPause();
     1324    }
     1325}
     1326
     1327DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1328{
     1329    LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
     1330
     1331    Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
     1332    pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1333    vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
     1334}
     1335
     1336static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
     1337{
     1338    return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
     1339}
     1340
     1341static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
     1342{
     1343    return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
     1344}
     1345
     1346static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
     1347{
     1348    return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1349}
     1350
     1351static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
     1352{
     1353    return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1354}
     1355
     1356static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
     1357{
     1358    return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
     1359}
     1360
     1361/**
     1362 * Process the I/O context, core method which assumes that the I/O context
     1363 * acquired the lock.
     1364 *
     1365 * @returns VBox status code.
     1366 * @param   pIoCtx    I/O context to process.
     1367 */
     1368static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
     1369{
     1370    int rc = VINF_SUCCESS;
     1371
     1372    VD_IS_LOCKED(pIoCtx->pDisk);
     1373
     1374    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     1375
     1376    if (   !pIoCtx->cMetaTransfersPending
     1377        && !pIoCtx->cDataTransfersPending
     1378        && !pIoCtx->pfnIoCtxTransfer)
     1379    {
     1380        rc = VINF_VD_ASYNC_IO_FINISHED;
     1381        goto out;
     1382    }
     1383
     1384    /*
     1385     * We complete the I/O context in case of an error
     1386     * if there is no I/O task pending.
     1387     */
     1388    if (   RT_FAILURE(pIoCtx->rcReq)
     1389        && !pIoCtx->cMetaTransfersPending
     1390        && !pIoCtx->cDataTransfersPending)
     1391    {
     1392        rc = VINF_VD_ASYNC_IO_FINISHED;
     1393        goto out;
     1394    }
     1395
     1396    /* Don't change anything if there is a metadata transfer pending or we are blocked. */
     1397    if (   pIoCtx->cMetaTransfersPending
     1398        || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
     1399    {
     1400        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1401        goto out;
     1402    }
     1403
     1404    if (pIoCtx->pfnIoCtxTransfer)
     1405    {
     1406        /* Call the transfer function advancing to the next while there is no error. */
     1407        while (   pIoCtx->pfnIoCtxTransfer
     1408               && !pIoCtx->cMetaTransfersPending
     1409               && RT_SUCCESS(rc))
     1410        {
     1411            LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
     1412            rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
     1413
     1414            /* Advance to the next part of the transfer if the current one succeeded. */
     1415            if (RT_SUCCESS(rc))
     1416            {
     1417                pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
     1418                pIoCtx->pfnIoCtxTransferNext = NULL;
     1419            }
     1420        }
     1421    }
     1422
     1423    if (   RT_SUCCESS(rc)
     1424        && !pIoCtx->cMetaTransfersPending
     1425        && !pIoCtx->cDataTransfersPending)
     1426        rc = VINF_VD_ASYNC_IO_FINISHED;
     1427    else if (   RT_SUCCESS(rc)
     1428             || rc == VERR_VD_NOT_ENOUGH_METADATA
     1429             || rc == VERR_VD_IOCTX_HALT)
     1430        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1431    else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
     1432    {
     1433        ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
     1434        /*
     1435         * The I/O context completed if we have an error and there is no data
     1436         * or meta data transfer pending.
     1437         */
     1438        if (   !pIoCtx->cMetaTransfersPending
     1439            && !pIoCtx->cDataTransfersPending)
     1440            rc = VINF_VD_ASYNC_IO_FINISHED;
     1441        else
     1442            rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1443    }
     1444
     1445out:
     1446    LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
     1447                 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
     1448                 pIoCtx->fComplete));
     1449
     1450    return rc;
     1451}
     1452
     1453/**
     1454 * Processes the list of waiting I/O contexts.
     1455 *
     1456 * @returns VBox status code.
     1457 * @param   pDisk    The disk structure.
     1458 * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
     1459 *                   The status code is returned. NULL if there is no I/O context
     1460 *                   to return the status code for.
     1461 */
     1462static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
     1463{
     1464    int rc = VINF_SUCCESS;
     1465
     1466    LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
     1467
     1468    VD_IS_LOCKED(pDisk);
     1469
     1470    /* Get the waiting list and process it in FIFO order. */
     1471    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
     1472
     1473    /* Reverse it. */
     1474    PVDIOCTX pCur = pIoCtxHead;
     1475    pIoCtxHead = NULL;
     1476    while (pCur)
     1477    {
     1478        PVDIOCTX pInsert = pCur;
     1479        pCur = pCur->pIoCtxNext;
     1480        pInsert->pIoCtxNext = pIoCtxHead;
     1481        pIoCtxHead = pInsert;
     1482    }
     1483
     1484    /* Process now. */
     1485    pCur = pIoCtxHead;
     1486    while (pCur)
     1487    {
     1488        int rcTmp;
     1489        PVDIOCTX pTmp = pCur;
     1490
     1491        pCur = pCur->pIoCtxNext;
     1492        pTmp->pIoCtxNext = NULL;
     1493
     1494        /*
     1495         * Need to clear the sync flag here if there is a new I/O context
     1496         * with it set and the context is not given in pIoCtxRc.
     1497         * This happens most likely on a different thread and that one shouldn't
     1498         * process the context synchronously.
     1499         *
     1500         * The thread who issued the context will wait on the event semaphore
     1501         * anyway which is signalled when the completion handler is called.
     1502         */
     1503        if (   pTmp->fFlags & VDIOCTX_FLAGS_SYNC
     1504            && pTmp != pIoCtxRc)
     1505            pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
     1506
     1507        rcTmp = vdIoCtxProcessLocked(pTmp);
     1508        if (pTmp == pIoCtxRc)
     1509        {
     1510            /* The given I/O context was processed, pass the return code to the caller. */
     1511            rc = rcTmp;
     1512        }
     1513        else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
     1514                 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1515        {
     1516            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1517            vdThreadFinishWrite(pDisk);
     1518            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1519                                        pTmp->Type.Root.pvUser2,
     1520                                        pTmp->rcReq);
     1521            vdIoCtxFree(pDisk, pTmp);
     1522        }
     1523    }
     1524
     1525    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1526    return rc;
     1527}
     1528
     1529/**
     1530 * Processes the list of blocked I/O contexts.
     1531 *
     1532 * @returns nothing.
     1533 * @param   pDisk    The disk structure.
     1534 */
     1535static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
     1536{
     1537    LogFlowFunc(("pDisk=%#p\n", pDisk));
     1538
     1539    VD_IS_LOCKED(pDisk);
     1540
     1541    /* Get the waiting list and process it in FIFO order. */
     1542    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
     1543
     1544    /* Reverse it. */
     1545    PVDIOCTX pCur = pIoCtxHead;
     1546    pIoCtxHead = NULL;
     1547    while (pCur)
     1548    {
     1549        PVDIOCTX pInsert = pCur;
     1550        pCur = pCur->pIoCtxNext;
     1551        pInsert->pIoCtxNext = pIoCtxHead;
     1552        pIoCtxHead = pInsert;
     1553    }
     1554
     1555    /* Process now. */
     1556    pCur = pIoCtxHead;
     1557    while (pCur)
     1558    {
     1559        int rc;
     1560        PVDIOCTX pTmp = pCur;
     1561
     1562        pCur = pCur->pIoCtxNext;
     1563        pTmp->pIoCtxNext = NULL;
     1564
     1565        Assert(!pTmp->pIoCtxParent);
     1566        Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
     1567        pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
     1568
     1569        rc = vdIoCtxProcessLocked(pTmp);
     1570        if (   rc == VINF_VD_ASYNC_IO_FINISHED
     1571            && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1572        {
     1573            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1574            vdThreadFinishWrite(pDisk);
     1575            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1576                                        pTmp->Type.Root.pvUser2,
     1577                                        pTmp->rcReq);
     1578            vdIoCtxFree(pDisk, pTmp);
     1579        }
     1580    }
     1581
     1582    LogFlowFunc(("returns\n"));
     1583}
     1584
     1585/**
     1586 * Processes the I/O context trying to lock the criticial section.
     1587 * The context is deferred if the critical section is busy.
     1588 *
     1589 * @returns VBox status code.
     1590 * @param   pIoCtx    The I/O context to process.
     1591 */
     1592static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
     1593{
     1594    int rc = VINF_SUCCESS;
     1595    PVBOXHDD pDisk = pIoCtx->pDisk;
     1596
     1597    Log(("Defer pIoCtx=%#p\n", pIoCtx));
     1598
     1599    /* Put it on the waiting list first. */
     1600    vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
     1601
     1602    if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
     1603    {
     1604        /* Leave it again, the context will be processed just before leaving the lock. */
     1605        LogFlowFunc(("Successfully acquired the lock\n"));
     1606        rc = vdDiskUnlock(pDisk, pIoCtx);
     1607    }
     1608    else
     1609    {
     1610        LogFlowFunc(("Lock is held\n"));
     1611        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1612    }
     1613
     1614    return rc;
     1615}
     1616
     1617/**
     1618 * Process the I/O context in a synchronous manner, waiting
     1619 * for it to complete.
     1620 *
     1621 * @returns VBox status code of the completed request.
     1622 * @param   pIoCtx    The sync I/O context.
     1623 */
     1624static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
     1625{
     1626    int rc = VINF_SUCCESS;
     1627    PVBOXHDD pDisk = pIoCtx->pDisk;
     1628
     1629    LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
     1630
     1631    AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
     1632              ("I/O context is not marked as synchronous\n"));
     1633
     1634    rc = vdIoCtxProcessTryLockDefer(pIoCtx);
     1635    if (rc == VINF_VD_ASYNC_IO_FINISHED)
     1636        rc = VINF_SUCCESS;
     1637
     1638    if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1639    {
     1640        rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
     1641        AssertRC(rc);
     1642
     1643        rc = pDisk->rcSync;
     1644    }
     1645    else /* Success or error. */
     1646        vdIoCtxFree(pDisk, pIoCtx);
     1647
     1648    return rc;
     1649}
     1650
     1651DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1652{
     1653    return pDisk->pIoCtxLockOwner == pIoCtx;
     1654}
     1655
     1656static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1657{
     1658    int rc = VINF_SUCCESS;
     1659
     1660    VD_IS_LOCKED(pDisk);
     1661
     1662    LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
     1663
     1664    if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
     1665    {
     1666        Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
     1667        vdIoCtxDefer(pDisk, pIoCtx);
     1668        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1669    }
     1670
     1671    LogFlowFunc(("returns -> %Rrc\n", rc));
     1672    return rc;
     1673}
     1674
     1675static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
     1676{
     1677    LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
     1678                 pDisk, pIoCtx, fProcessBlockedReqs));
     1679
     1680    VD_IS_LOCKED(pDisk);
     1681
     1682    LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
     1683    Assert(pDisk->pIoCtxLockOwner == pIoCtx);
     1684    ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
     1685
     1686    if (fProcessBlockedReqs)
     1687    {
     1688        /* Process any blocked writes if the current request didn't caused another growing. */
     1689        vdDiskProcessBlockedIoCtx(pDisk);
     1690    }
     1691
     1692    LogFlowFunc(("returns\n"));
     1693}
     1694
     1695/**
    9101696 * Internal: Reads a given amount of data from the image chain of the disk.
    9111697 **/
    9121698static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
    913                             uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)
     1699                            uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
    9141700{
    9151701    int rc = VINF_SUCCESS;
    9161702    size_t cbThisRead = cbRead;
    917     RTSGSEG SegmentBuf;
    918     RTSGBUF SgBuf;
    919     VDIOCTX IoCtx;
    9201703
    9211704    AssertPtr(pcbThisRead);
    9221705
    9231706    *pcbThisRead = 0;
    924 
    925     SegmentBuf.pvSeg = pvBuf;
    926     SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
    927     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    928     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    929                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    9301707
    9311708    /*
     
    9341711     */
    9351712    rc = pImage->Backend->pfnRead(pImage->pBackendData,
    936                                        uOffset, cbThisRead, &IoCtx,
    937                                        &cbThisRead);
     1713                                  uOffset, cbThisRead, pIoCtx,
     1714                                  &cbThisRead);
    9381715
    9391716    if (rc == VERR_VD_BLOCK_FREE)
     
    9441721        {
    9451722            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    946                                                    uOffset, cbThisRead, &IoCtx,
    947                                                    &cbThisRead);
     1723                                              uOffset, cbThisRead, pIoCtx,
     1724                                              &cbThisRead);
    9481725        }
    9491726    }
     
    9521729        *pcbThisRead = cbThisRead;
    9531730
     1731    return rc;
     1732}
     1733
     1734/**
     1735 * internal: read the specified amount of data in whatever blocks the backend
     1736 * will give us - async version.
     1737 */
     1738static int vdReadHelperAsync(PVDIOCTX pIoCtx)
     1739{
     1740    int rc;
     1741    PVBOXHDD pDisk                = pIoCtx->pDisk;
     1742    size_t cbToRead               = pIoCtx->Req.Io.cbTransfer;
     1743    uint64_t uOffset              = pIoCtx->Req.Io.uOffset;
     1744    PVDIMAGE pCurrImage           = pIoCtx->Req.Io.pImageCur;
     1745    PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
     1746    unsigned cImagesRead          = pIoCtx->Req.Io.cImagesRead;
     1747    size_t cbThisRead;
     1748
     1749    /* Loop until all reads started or we have a backend which needs to read metadata. */
     1750    do
     1751    {
     1752        /* Search for image with allocated block. Do not attempt to read more
     1753         * than the previous reads marked as valid. Otherwise this would return
     1754         * stale data when different block sizes are used for the images. */
     1755        cbThisRead = cbToRead;
     1756
     1757        if (   pDisk->pCache
     1758            && !pImageParentOverride)
     1759        {
     1760            rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
     1761                                   pIoCtx, &cbThisRead);
     1762            if (rc == VERR_VD_BLOCK_FREE)
     1763            {
     1764                rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
     1765                                      pIoCtx, &cbThisRead);
     1766
     1767                /* If the read was successful, write the data back into the cache. */
     1768                if (   RT_SUCCESS(rc)
     1769                    && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE)
     1770                {
     1771                    rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
     1772                                            pIoCtx, NULL);
     1773                }
     1774            }
     1775        }
     1776        else
     1777        {
     1778
     1779            /*
     1780             * Try to read from the given image.
     1781             * If the block is not allocated read from override chain if present.
     1782             */
     1783            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1784                                              uOffset, cbThisRead, pIoCtx,
     1785                                              &cbThisRead);
     1786
     1787            if (   rc == VERR_VD_BLOCK_FREE
     1788                && cImagesRead != 1)
     1789            {
     1790                unsigned cImagesToProcess = cImagesRead;
     1791
     1792                pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
     1793                pIoCtx->Req.Io.pImageParentOverride = NULL;
     1794
     1795                while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
     1796                {
     1797                    rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1798                                                      uOffset, cbThisRead,
     1799                                                      pIoCtx, &cbThisRead);
     1800                    if (cImagesToProcess == 1)
     1801                        break;
     1802                    else if (cImagesToProcess > 0)
     1803                        cImagesToProcess--;
     1804
     1805                    if (rc == VERR_VD_BLOCK_FREE)
     1806                        pCurrImage = pCurrImage->pPrev;
     1807                }
     1808            }
     1809        }
     1810
     1811        /* The task state will be updated on success already, don't do it here!. */
     1812        if (rc == VERR_VD_BLOCK_FREE)
     1813        {
     1814            /* No image in the chain contains the data for the block. */
     1815            ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
     1816
     1817            /* Fill the free space with 0 if we are told to do so
     1818             * or a previous read returned valid data. */
     1819            if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
     1820                vdIoCtxSet(pIoCtx, '\0', cbThisRead);
     1821            else
     1822                pIoCtx->Req.Io.cbBufClear += cbThisRead;
     1823
     1824            if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
     1825                rc = VINF_VD_NEW_ZEROED_BLOCK;
     1826            else
     1827                rc = VINF_SUCCESS;
     1828        }
     1829        else if (rc == VERR_VD_IOCTX_HALT)
     1830        {
     1831            uOffset  += cbThisRead;
     1832            cbToRead -= cbThisRead;
     1833            pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1834        }
     1835        else if (   RT_SUCCESS(rc)
     1836                 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1837        {
     1838            /* First not free block, fill the space before with 0. */
     1839            if (   pIoCtx->Req.Io.cbBufClear
     1840                && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
     1841            {
     1842                RTSGBUF SgBuf;
     1843                RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
     1844                RTSgBufReset(&SgBuf);
     1845                RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
     1846                pIoCtx->Req.Io.cbBufClear = 0;
     1847                pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     1848            }
     1849            rc = VINF_SUCCESS;
     1850        }
     1851
     1852        if (RT_FAILURE(rc))
     1853            break;
     1854
     1855        cbToRead -= cbThisRead;
     1856        uOffset  += cbThisRead;
     1857        pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
     1858    } while (cbToRead != 0 && RT_SUCCESS(rc));
     1859
     1860    if (   rc == VERR_VD_NOT_ENOUGH_METADATA
     1861        || rc == VERR_VD_IOCTX_HALT)
     1862    {
     1863        /* Save the current state. */
     1864        pIoCtx->Req.Io.uOffset    = uOffset;
     1865        pIoCtx->Req.Io.cbTransfer = cbToRead;
     1866        pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
     1867    }
     1868
     1869    return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
     1870           ? VERR_VD_BLOCK_FREE
     1871           : rc;
     1872}
     1873
     1874/**
     1875 * internal: parent image read wrapper for compacting.
     1876 */
     1877static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
     1878                        size_t cbRead)
     1879{
     1880    PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
     1881
     1882    /** @todo
     1883     * Only used for compaction so far which is not possible to mix with async I/O.
     1884     * Needs to be changed if we want to support online compaction of images.
     1885     */
     1886    bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
     1887    AssertMsgReturn(!fLocked,
     1888                    ("Calling synchronous parent read while another thread holds the disk lock\n"),
     1889                    VERR_VD_INVALID_STATE);
     1890
     1891    /* Fake an I/O context. */
     1892    RTSGSEG Segment;
     1893    RTSGBUF SgBuf;
     1894    VDIOCTX IoCtx;
     1895
     1896    Segment.pvSeg = pvBuf;
     1897    Segment.cbSeg = cbRead;
     1898    RTSgBufInit(&SgBuf, &Segment, 1);
     1899    vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
     1900                &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1901    int rc = vdReadHelperAsync(&IoCtx);
     1902    ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
    9541903    return rc;
    9551904}
     
    9841933                          bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
    9851934{
    986     int rc = VINF_SUCCESS;
    987     size_t cbThisRead;
    988     bool fAllFree = true;
    989     size_t cbBufClear = 0;
    990 
    991     /* Loop until all read. */
    992     do
    993     {
    994         /* Search for image with allocated block. Do not attempt to read more
    995          * than the previous reads marked as valid. Otherwise this would return
    996          * stale data when different block sizes are used for the images. */
    997         cbThisRead = cbRead;
    998 
    999         if (   pDisk->pCache
    1000             && !pImageParentOverride)
    1001         {
    1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1003             rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf,
    1004                                    cbThisRead, &cbThisRead);
    1005 #endif
    1006             if (rc == VERR_VD_BLOCK_FREE)
    1007             {
    1008                 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead,
    1009                                       &cbThisRead);
    1010 
    1011                 /* If the read was successful, write the data back into the cache. */
    1012                 if (   RT_SUCCESS(rc)
    1013                     && fUpdateCache)
    1014                 {
    1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1016                     rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf,
    1017                                             cbThisRead, NULL);
    1018 #endif
    1019                 }
    1020             }
    1021         }
    1022         else
    1023         {
    1024             RTSGSEG SegmentBuf;
    1025             RTSGBUF SgBuf;
    1026             VDIOCTX IoCtx;
    1027 
    1028             SegmentBuf.pvSeg = pvBuf;
    1029             SegmentBuf.cbSeg = cbThisRead;
    1030             RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    1031             vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    1032                         &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1033 
    1034             /*
    1035              * Try to read from the given image.
    1036              * If the block is not allocated read from override chain if present.
    1037              */
    1038             rc = pImage->Backend->pfnRead(pImage->pBackendData,
    1039                                                uOffset, cbThisRead, &IoCtx,
    1040                                                &cbThisRead);
    1041 
    1042             if (   rc == VERR_VD_BLOCK_FREE
    1043                 && cImagesRead != 1)
    1044             {
    1045                 unsigned cImagesToProcess = cImagesRead;
    1046 
    1047                 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
    1048                      pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
    1049                      pCurrImage = pCurrImage->pPrev)
    1050                 {
    1051                     rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1052                                                            uOffset, cbThisRead,
    1053                                                            &IoCtx, &cbThisRead);
    1054                     if (cImagesToProcess == 1)
    1055                         break;
    1056                     else if (cImagesToProcess > 0)
    1057                         cImagesToProcess--;
    1058                 }
    1059             }
    1060         }
    1061 
    1062         /* No image in the chain contains the data for the block. */
    1063         if (rc == VERR_VD_BLOCK_FREE)
    1064         {
    1065             /* Fill the free space with 0 if we are told to do so
    1066              * or a previous read returned valid data. */
    1067             if (fZeroFreeBlocks || !fAllFree)
    1068                 memset(pvBuf, '\0', cbThisRead);
    1069             else
    1070                 cbBufClear += cbThisRead;
    1071 
    1072             if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
    1073                 rc = VINF_VD_NEW_ZEROED_BLOCK;
    1074             else
    1075                 rc = VINF_SUCCESS;
    1076         }
    1077         else if (RT_SUCCESS(rc))
    1078         {
    1079             /* First not free block, fill the space before with 0. */
    1080             if (!fZeroFreeBlocks)
    1081             {
    1082                 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear);
    1083                 cbBufClear = 0;
    1084                 fAllFree = false;
    1085             }
    1086         }
    1087 
    1088         cbRead -= cbThisRead;
    1089         uOffset += cbThisRead;
    1090         pvBuf = (char *)pvBuf + cbThisRead;
    1091     } while (cbRead != 0 && RT_SUCCESS(rc));
    1092 
    1093     return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc;
     1935    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     1936    RTSGSEG Segment;
     1937    RTSGBUF SgBuf;
     1938    VDIOCTX IoCtx;
     1939
     1940    if (fZeroFreeBlocks)
     1941        fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     1942    if (fUpdateCache)
     1943        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     1944
     1945    Segment.pvSeg = pvBuf;
     1946    Segment.cbSeg = cbRead;
     1947    RTSgBufInit(&SgBuf, &Segment, 1);
     1948    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
     1949                NULL, vdReadHelperAsync, fFlags);
     1950
     1951    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     1952    IoCtx.Req.Io.cImagesRead = cImagesRead;
     1953    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     1954    IoCtx.Type.Root.pvUser1     = pDisk;
     1955    IoCtx.Type.Root.pvUser2     = NULL;
     1956    return vdIoCtxProcessSync(&IoCtx);
    10941957}
    10951958
     
    11031966    return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
    11041967                          true /* fZeroFreeBlocks */, fUpdateCache, 0);
    1105 }
    1106 
    1107 /**
    1108  * Creates a new empty discard state.
    1109  *
    1110  * @returns Pointer to the new discard state or NULL if out of memory.
    1111  */
    1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)
    1113 {
    1114     PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
    1115 
    1116     if (pDiscard)
    1117     {
    1118         RTListInit(&pDiscard->ListLru);
    1119         pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
    1120         if (!pDiscard->pTreeBlocks)
    1121         {
    1122             RTMemFree(pDiscard);
    1123             pDiscard = NULL;
    1124         }
    1125     }
    1126 
    1127     return pDiscard;
    1128 }
    1129 
    1130 /**
    1131  * Removes the least recently used blocks from the waiting list until
    1132  * the new value is reached.
    1133  *
    1134  * @returns VBox status code.
    1135  * @param   pDisk              VD disk container.
    1136  * @param   pDiscard           The discard state.
    1137  * @param   cbDiscardingNew    How many bytes should be waiting on success.
    1138  *                             The number of bytes waiting can be less.
    1139  */
    1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
    1141 {
    1142     int rc = VINF_SUCCESS;
    1143 
    1144     LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
    1145                  pDisk, pDiscard, cbDiscardingNew));
    1146 
    1147     while (pDiscard->cbDiscarding > cbDiscardingNew)
    1148     {
    1149         PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
    1150 
    1151         Assert(!RTListIsEmpty(&pDiscard->ListLru));
    1152 
    1153         /* Go over the allocation bitmap and mark all discarded sectors as unused. */
    1154         uint64_t offStart = pBlock->Core.Key;
    1155         uint32_t idxStart = 0;
    1156         size_t cbLeft = pBlock->cbDiscard;
    1157         bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
    1158         uint32_t cSectors = pBlock->cbDiscard / 512;
    1159 
    1160         while (cbLeft > 0)
    1161         {
    1162             int32_t idxEnd;
    1163             size_t cbThis = cbLeft;
    1164 
    1165             if (fAllocated)
    1166             {
    1167                 /* Check for the first unallocated bit. */
    1168                 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
    1169                 if (idxEnd != -1)
    1170                 {
    1171                     cbThis = (idxEnd - idxStart) * 512;
    1172                     fAllocated = false;
    1173                 }
    1174             }
    1175             else
    1176             {
    1177                 /* Mark as unused and check for the first set bit. */
    1178                 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
    1179                 if (idxEnd != -1)
    1180                     cbThis = (idxEnd - idxStart) * 512;
    1181 
    1182 
    1183                 VDIOCTX IoCtx;
    1184                 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
    1185                             NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1186                 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
    1187                                                             &IoCtx, offStart, cbThis, NULL,
    1188                                                             NULL, &cbThis, NULL,
    1189                                                             VD_DISCARD_MARK_UNUSED);
    1190                 if (RT_FAILURE(rc))
    1191                     break;
    1192 
    1193                 fAllocated = true;
    1194             }
    1195 
    1196             idxStart  = idxEnd;
    1197             offStart += cbThis;
    1198             cbLeft   -= cbThis;
    1199         }
    1200 
    1201         if (RT_FAILURE(rc))
    1202             break;
    1203 
    1204         PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
    1205         Assert(pBlockRemove == pBlock);
    1206         RTListNodeRemove(&pBlock->NodeLru);
    1207 
    1208         pDiscard->cbDiscarding -= pBlock->cbDiscard;
    1209         RTMemFree(pBlock->pbmAllocated);
    1210         RTMemFree(pBlock);
    1211     }
    1212 
    1213     Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
    1214 
    1215     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1216     return rc;
    1217 }
    1218 
    1219 /**
    1220  * Destroys the current discard state, writing any waiting blocks to the image.
    1221  *
    1222  * @returns VBox status code.
    1223  * @param   pDisk    VD disk container.
    1224  */
    1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)
    1226 {
    1227     int rc = VINF_SUCCESS;
    1228 
    1229     if (pDisk->pDiscard)
    1230     {
    1231         rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
    1232         AssertRC(rc);
    1233         RTMemFree(pDisk->pDiscard->pTreeBlocks);
    1234         RTMemFree(pDisk->pDiscard);
    1235         pDisk->pDiscard = NULL;
    1236     }
    1237 
    1238     return rc;
    1239 }
    1240 
    1241 /**
    1242  * Marks the given range as allocated in the image.
    1243  * Required if there are discards in progress and a write to a block which can get discarded
    1244  * is written to.
    1245  *
    1246  * @returns VBox status code.
    1247  * @param   pDisk    VD container data.
    1248  * @param   uOffset  First byte to mark as allocated.
    1249  * @param   cbRange  Number of bytes to mark as allocated.
    1250  */
    1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
    1252 {
    1253     PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
    1254     int rc = VINF_SUCCESS;
    1255 
    1256     if (pDiscard)
    1257     {
    1258         do
    1259         {
    1260             size_t cbThisRange = cbRange;
    1261             PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
    1262 
    1263             if (pBlock)
    1264             {
    1265                 int32_t idxStart, idxEnd;
    1266 
    1267                 Assert(!(cbThisRange % 512));
    1268                 Assert(!((uOffset - pBlock->Core.Key) % 512));
    1269 
    1270                 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
    1271 
    1272                 idxStart = (uOffset - pBlock->Core.Key) / 512;
    1273                 idxEnd = idxStart + (cbThisRange / 512);
    1274                 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
    1275             }
    1276             else
    1277             {
    1278                 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
    1279                 if (pBlock)
    1280                     cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
    1281             }
    1282 
    1283             Assert(cbRange >= cbThisRange);
    1284 
    1285             uOffset += cbThisRange;
    1286             cbRange -= cbThisRange;
    1287         } while (cbRange != 0);
    1288     }
    1289 
    1290     return rc;
    1291 }
    1292 
    1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1294                                   uint64_t uOffset, size_t cbTransfer,
    1295                                   PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
    1296                                   void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1297                                   uint32_t fFlags)
    1298 {
    1299     PVDIOCTX pIoCtx = NULL;
    1300 
    1301     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1302     if (RT_LIKELY(pIoCtx))
    1303     {
    1304         vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1305                     pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1306     }
    1307 
    1308     return pIoCtx;
    1309 }
    1310 
    1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1312                                       uint64_t uOffset, size_t cbTransfer,
    1313                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1314                                       PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1315                                       void *pvUser1, void *pvUser2,
    1316                                       void *pvAllocation,
    1317                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1318                                       uint32_t fFlags)
    1319 {
    1320     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1321                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1322 
    1323     if (RT_LIKELY(pIoCtx))
    1324     {
    1325         pIoCtx->pIoCtxParent          = NULL;
    1326         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1327         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1328         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1329     }
    1330 
    1331     LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
    1332     return pIoCtx;
    1333 }
    1334 
    1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
    1336                                          unsigned cRanges,
    1337                                          PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1338                                          void *pvUser1, void *pvUser2,
    1339                                          void *pvAllocation,
    1340                                          PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1341                                          uint32_t fFlags)
    1342 {
    1343     PVDIOCTX pIoCtx = NULL;
    1344 
    1345     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1346     if (RT_LIKELY(pIoCtx))
    1347     {
    1348         pIoCtx->pIoCtxNext                = NULL;
    1349         pIoCtx->pDisk                     = pDisk;
    1350         pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
    1351         pIoCtx->cDataTransfersPending     = 0;
    1352         pIoCtx->cMetaTransfersPending     = 0;
    1353         pIoCtx->fComplete                 = false;
    1354         pIoCtx->fFlags                    = fFlags;
    1355         pIoCtx->pvAllocation              = pvAllocation;
    1356         pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
    1357         pIoCtx->pfnIoCtxTransferNext      = NULL;
    1358         pIoCtx->rcReq                     = VINF_SUCCESS;
    1359         pIoCtx->Req.Discard.paRanges      = paRanges;
    1360         pIoCtx->Req.Discard.cRanges       = cRanges;
    1361         pIoCtx->Req.Discard.idxRange      = 0;
    1362         pIoCtx->Req.Discard.cbDiscardLeft = 0;
    1363         pIoCtx->Req.Discard.offCur        = 0;
    1364         pIoCtx->Req.Discard.cbThisDiscard = 0;
    1365 
    1366         pIoCtx->pIoCtxParent          = NULL;
    1367         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1368         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1369         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1370     }
    1371 
    1372     LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
    1373     return pIoCtx;
    1374 }
    1375 
    1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1377                                        uint64_t uOffset, size_t cbTransfer,
    1378                                        PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1379                                        PVDIOCTX pIoCtxParent, size_t cbTransferParent,
    1380                                        size_t cbWriteParent, void *pvAllocation,
    1381                                        PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
    1382 {
    1383     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1384                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);
    1385 
    1386     AssertPtr(pIoCtxParent);
    1387     Assert(!pIoCtxParent->pIoCtxParent);
    1388 
    1389     if (RT_LIKELY(pIoCtx))
    1390     {
    1391         pIoCtx->pIoCtxParent                   = pIoCtxParent;
    1392         pIoCtx->Type.Child.uOffsetSaved        = uOffset;
    1393         pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
    1394         pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
    1395         pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
    1396     }
    1397 
    1398     LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
    1399     return pIoCtx;
    1400 }
    1401 
    1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
    1403 {
    1404     PVDIOTASK pIoTask = NULL;
    1405 
    1406     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1407     if (pIoTask)
    1408     {
    1409         pIoTask->pIoStorage           = pIoStorage;
    1410         pIoTask->pfnComplete          = pfnComplete;
    1411         pIoTask->pvUser               = pvUser;
    1412         pIoTask->fMeta                = false;
    1413         pIoTask->Type.User.cbTransfer = cbTransfer;
    1414         pIoTask->Type.User.pIoCtx     = pIoCtx;
    1415     }
    1416 
    1417     return pIoTask;
    1418 }
    1419 
    1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
    1421 {
    1422     PVDIOTASK pIoTask = NULL;
    1423 
    1424     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1425     if (pIoTask)
    1426     {
    1427         pIoTask->pIoStorage          = pIoStorage;
    1428         pIoTask->pfnComplete         = pfnComplete;
    1429         pIoTask->pvUser              = pvUser;
    1430         pIoTask->fMeta               = true;
    1431         pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
    1432     }
    1433 
    1434     return pIoTask;
    1435 }
    1436 
    1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1438 {
    1439     LogFlow(("Freeing I/O context %#p\n", pIoCtx));
    1440     if (pIoCtx->pvAllocation)
    1441         RTMemFree(pIoCtx->pvAllocation);
    1442 #ifdef DEBUG
    1443     memset(pIoCtx, 0xff, sizeof(VDIOCTX));
    1444 #endif
    1445     RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
    1446 }
    1447 
    1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
    1449 {
    1450     RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
    1451 }
    1452 
    1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
    1454 {
    1455     AssertPtr(pIoCtx->pIoCtxParent);
    1456 
    1457     RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    1458     pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
    1459     pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
    1460 }
    1461 
    1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
    1463 {
    1464     PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
    1465 
    1466     if (RT_LIKELY(pMetaXfer))
    1467     {
    1468         pMetaXfer->Core.Key     = uOffset;
    1469         pMetaXfer->Core.KeyLast = uOffset + cb - 1;
    1470         pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
    1471         pMetaXfer->cbMeta       = cb;
    1472         pMetaXfer->pIoStorage   = pIoStorage;
    1473         pMetaXfer->cRefs        = 0;
    1474         RTListInit(&pMetaXfer->ListIoCtxWaiting);
    1475     }
    1476     return pMetaXfer;
    1477 }
    1478 
    1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
    1480 {
    1481     /* Put it on the waiting list. */
    1482     PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
    1483     PVDIOCTX pHeadOld;
    1484     pIoCtx->pIoCtxNext = pNext;
    1485     while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
    1486     {
    1487         pNext = pHeadOld;
    1488         Assert(pNext != pIoCtx);
    1489         pIoCtx->pIoCtxNext = pNext;
    1490         ASMNopPause();
    1491     }
    1492 }
    1493 
    1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1495 {
    1496     LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
    1497 
    1498     Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
    1499     pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1500     vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
    1501 }
    1502 
    1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
    1504 {
    1505     return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
    1506 }
    1507 
    1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
    1509 {
    1510     return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
    1511 }
    1512 
    1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
    1514 {
    1515     return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1516 }
    1517 
    1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
    1519 {
    1520     return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1521 }
    1522 
    1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
    1524 {
    1525     return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
    1526 }
    1527 
    1528 /**
    1529  * Process the I/O context, core method which assumes that the I/O context
    1530  * acquired the lock.
    1531  *
    1532  * @returns VBox status code.
    1533  * @param   pIoCtx    I/O context to process.
    1534  */
    1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
    1536 {
    1537     int rc = VINF_SUCCESS;
    1538 
    1539     VD_IS_LOCKED(pIoCtx->pDisk);
    1540 
    1541     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1542 
    1543     if (   !pIoCtx->cMetaTransfersPending
    1544         && !pIoCtx->cDataTransfersPending
    1545         && !pIoCtx->pfnIoCtxTransfer)
    1546     {
    1547         rc = VINF_VD_ASYNC_IO_FINISHED;
    1548         goto out;
    1549     }
    1550 
    1551     /*
    1552      * We complete the I/O context in case of an error
    1553      * if there is no I/O task pending.
    1554      */
    1555     if (   RT_FAILURE(pIoCtx->rcReq)
    1556         && !pIoCtx->cMetaTransfersPending
    1557         && !pIoCtx->cDataTransfersPending)
    1558     {
    1559         rc = VINF_VD_ASYNC_IO_FINISHED;
    1560         goto out;
    1561     }
    1562 
    1563     /* Don't change anything if there is a metadata transfer pending or we are blocked. */
    1564     if (   pIoCtx->cMetaTransfersPending
    1565         || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
    1566     {
    1567         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1568         goto out;
    1569     }
    1570 
    1571     if (pIoCtx->pfnIoCtxTransfer)
    1572     {
    1573         /* Call the transfer function advancing to the next while there is no error. */
    1574         while (   pIoCtx->pfnIoCtxTransfer
    1575                && !pIoCtx->cMetaTransfersPending
    1576                && RT_SUCCESS(rc))
    1577         {
    1578             LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
    1579             rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
    1580 
    1581             /* Advance to the next part of the transfer if the current one succeeded. */
    1582             if (RT_SUCCESS(rc))
    1583             {
    1584                 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
    1585                 pIoCtx->pfnIoCtxTransferNext = NULL;
    1586             }
    1587         }
    1588     }
    1589 
    1590     if (   RT_SUCCESS(rc)
    1591         && !pIoCtx->cMetaTransfersPending
    1592         && !pIoCtx->cDataTransfersPending)
    1593         rc = VINF_VD_ASYNC_IO_FINISHED;
    1594     else if (   RT_SUCCESS(rc)
    1595              || rc == VERR_VD_NOT_ENOUGH_METADATA
    1596              || rc == VERR_VD_IOCTX_HALT)
    1597         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1598     else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
    1599     {
    1600         ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
    1601         /*
    1602          * The I/O context completed if we have an error and there is no data
    1603          * or meta data transfer pending.
    1604          */
    1605         if (   !pIoCtx->cMetaTransfersPending
    1606             && !pIoCtx->cDataTransfersPending)
    1607             rc = VINF_VD_ASYNC_IO_FINISHED;
    1608         else
    1609             rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1610     }
    1611 
    1612 out:
    1613     LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    1614                  pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
    1615                  pIoCtx->fComplete));
    1616 
    1617     return rc;
    1618 }
    1619 
    1620 /**
    1621  * Processes the list of waiting I/O contexts.
    1622  *
    1623  * @returns VBox status code.
    1624  * @param   pDisk    The disk structure.
    1625  * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
    1626  *                   The status code is returned. NULL if there is no I/O context
    1627  *                   to return the status code for.
    1628  */
    1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
    1630 {
    1631     int rc = VINF_SUCCESS;
    1632 
    1633     LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
    1634 
    1635     VD_IS_LOCKED(pDisk);
    1636 
    1637     /* Get the waiting list and process it in FIFO order. */
    1638     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
    1639 
    1640     /* Reverse it. */
    1641     PVDIOCTX pCur = pIoCtxHead;
    1642     pIoCtxHead = NULL;
    1643     while (pCur)
    1644     {
    1645         PVDIOCTX pInsert = pCur;
    1646         pCur = pCur->pIoCtxNext;
    1647         pInsert->pIoCtxNext = pIoCtxHead;
    1648         pIoCtxHead = pInsert;
    1649     }
    1650 
    1651     /* Process now. */
    1652     pCur = pIoCtxHead;
    1653     while (pCur)
    1654     {
    1655         int rcTmp;
    1656         PVDIOCTX pTmp = pCur;
    1657 
    1658         pCur = pCur->pIoCtxNext;
    1659         pTmp->pIoCtxNext = NULL;
    1660 
    1661         rcTmp = vdIoCtxProcessLocked(pTmp);
    1662         if (pTmp == pIoCtxRc)
    1663         {
    1664             /* The given I/O context was processed, pass the return code to the caller. */
    1665             rc = rcTmp;
    1666         }
    1667         else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
    1668                  && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1669         {
    1670             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1671             vdThreadFinishWrite(pDisk);
    1672             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1673                                         pTmp->Type.Root.pvUser2,
    1674                                         pTmp->rcReq);
    1675             vdIoCtxFree(pDisk, pTmp);
    1676         }
    1677     }
    1678 
    1679     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1680     return rc;
    1681 }
    1682 
    1683 /**
    1684  * Processes the list of blocked I/O contexts.
    1685  *
    1686  * @returns nothing.
    1687  * @param   pDisk    The disk structure.
    1688  */
    1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
    1690 {
    1691     LogFlowFunc(("pDisk=%#p\n", pDisk));
    1692 
    1693     VD_IS_LOCKED(pDisk);
    1694 
    1695     /* Get the waiting list and process it in FIFO order. */
    1696     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
    1697 
    1698     /* Reverse it. */
    1699     PVDIOCTX pCur = pIoCtxHead;
    1700     pIoCtxHead = NULL;
    1701     while (pCur)
    1702     {
    1703         PVDIOCTX pInsert = pCur;
    1704         pCur = pCur->pIoCtxNext;
    1705         pInsert->pIoCtxNext = pIoCtxHead;
    1706         pIoCtxHead = pInsert;
    1707     }
    1708 
    1709     /* Process now. */
    1710     pCur = pIoCtxHead;
    1711     while (pCur)
    1712     {
    1713         int rc;
    1714         PVDIOCTX pTmp = pCur;
    1715 
    1716         pCur = pCur->pIoCtxNext;
    1717         pTmp->pIoCtxNext = NULL;
    1718 
    1719         Assert(!pTmp->pIoCtxParent);
    1720         Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
    1721         pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
    1722 
    1723         rc = vdIoCtxProcessLocked(pTmp);
    1724         if (   rc == VINF_VD_ASYNC_IO_FINISHED
    1725             && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1726         {
    1727             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1728             vdThreadFinishWrite(pDisk);
    1729             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1730                                         pTmp->Type.Root.pvUser2,
    1731                                         pTmp->rcReq);
    1732             vdIoCtxFree(pDisk, pTmp);
    1733         }
    1734     }
    1735 
    1736     LogFlowFunc(("returns\n"));
    1737 }
    1738 
    1739 /**
    1740  * Processes the I/O context trying to lock the criticial section.
    1741  * The context is deferred if the critical section is busy.
    1742  *
    1743  * @returns VBox status code.
    1744  * @param   pIoCtx    The I/O context to process.
    1745  */
    1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
    1747 {
    1748     int rc = VINF_SUCCESS;
    1749     PVBOXHDD pDisk = pIoCtx->pDisk;
    1750 
    1751     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1752 
    1753     /* Put it on the waiting list first. */
    1754     vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
    1755 
    1756     if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
    1757     {
    1758         /* Leave it again, the context will be processed just before leaving the lock. */
    1759         LogFlowFunc(("Successfully acquired the lock\n"));
    1760         rc = vdDiskUnlock(pDisk, pIoCtx);
    1761     }
    1762     else
    1763     {
    1764         LogFlowFunc(("Lock is held\n"));
    1765         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1766     }
    1767 
    1768     return rc;
    1769 }
    1770 
    1771 /**
    1772  * Process the I/O context in a synchronous manner, waiting
    1773  * for it to complete.
    1774  *
    1775  * @returns VBox status code of the completed request.
    1776  * @param   pIoCtx    The sync I/O context.
    1777  */
    1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
    1779 {
    1780     int rc = VINF_SUCCESS;
    1781     PVBOXHDD pDisk = pIoCtx->pDisk;
    1782 
    1783     LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
    1784 
    1785     AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
    1786               ("I/O context is not marked as synchronous\n"));
    1787 
    1788     rc = vdIoCtxProcessTryLockDefer(pIoCtx);
    1789     if (rc == VINF_VD_ASYNC_IO_FINISHED)
    1790         rc = VINF_SUCCESS;
    1791 
    1792     if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1793     {
    1794         rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
    1795         AssertRC(rc);
    1796 
    1797         rc = pDisk->rcSync;
    1798     }
    1799     else /* Success or error. */
    1800         vdIoCtxFree(pDisk, pIoCtx);
    1801 
    1802     return rc;
    1803 }
    1804 
    1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1806 {
    1807     return pDisk->pIoCtxLockOwner == pIoCtx;
    1808 }
    1809 
    1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1811 {
    1812     int rc = VINF_SUCCESS;
    1813 
    1814     VD_IS_LOCKED(pDisk);
    1815 
    1816     LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
    1817 
    1818     if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
    1819     {
    1820         Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
    1821         vdIoCtxDefer(pDisk, pIoCtx);
    1822         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1823     }
    1824 
    1825     LogFlowFunc(("returns -> %Rrc\n", rc));
    1826     return rc;
    1827 }
    1828 
    1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
    1830 {
    1831     LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
    1832                  pDisk, pIoCtx, fProcessBlockedReqs));
    1833 
    1834     VD_IS_LOCKED(pDisk);
    1835 
    1836     LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
    1837     Assert(pDisk->pIoCtxLockOwner == pIoCtx);
    1838     ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
    1839 
    1840     if (fProcessBlockedReqs)
    1841     {
    1842         /* Process any blocked writes if the current request didn't caused another growing. */
    1843         vdDiskProcessBlockedIoCtx(pDisk);
    1844     }
    1845 
    1846     LogFlowFunc(("returns\n"));
    1847 }
    1848 
    1849 /**
    1850  * internal: read the specified amount of data in whatever blocks the backend
    1851  * will give us - async version.
    1852  */
    1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)
    1854 {
    1855     int rc;
    1856     size_t cbToRead     = pIoCtx->Req.Io.cbTransfer;
    1857     uint64_t uOffset    = pIoCtx->Req.Io.uOffset;
    1858     PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;
    1859     size_t cbThisRead;
    1860 
    1861     /* Loop until all reads started or we have a backend which needs to read metadata. */
    1862     do
    1863     {
    1864         /* Search for image with allocated block. Do not attempt to read more
    1865          * than the previous reads marked as valid. Otherwise this would return
    1866          * stale data when different block sizes are used for the images. */
    1867         cbThisRead = cbToRead;
    1868 
    1869         /*
    1870          * Try to read from the given image.
    1871          * If the block is not allocated read from override chain if present.
    1872          */
    1873         rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1874                                           uOffset, cbThisRead,
    1875                                           pIoCtx, &cbThisRead);
    1876 
    1877         if (rc == VERR_VD_BLOCK_FREE)
    1878         {
    1879             while (   pCurrImage->pPrev != NULL
    1880                    && rc == VERR_VD_BLOCK_FREE)
    1881             {
    1882                 pCurrImage =  pCurrImage->pPrev;
    1883                 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1884                                                   uOffset, cbThisRead,
    1885                                                   pIoCtx, &cbThisRead);
    1886             }
    1887         }
    1888 
    1889         /* The task state will be updated on success already, don't do it here!. */
    1890         if (rc == VERR_VD_BLOCK_FREE)
    1891         {
    1892             /* No image in the chain contains the data for the block. */
    1893             vdIoCtxSet(pIoCtx, '\0', cbThisRead);
    1894             ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
    1895             rc = VINF_SUCCESS;
    1896         }
    1897         else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1898             rc = VINF_SUCCESS;
    1899         else if (rc == VERR_VD_IOCTX_HALT)
    1900         {
    1901             uOffset  += cbThisRead;
    1902             cbToRead -= cbThisRead;
    1903             pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1904         }
    1905 
    1906         if (RT_FAILURE(rc))
    1907             break;
    1908 
    1909         cbToRead -= cbThisRead;
    1910         uOffset  += cbThisRead;
    1911         pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
    1912     } while (cbToRead != 0 && RT_SUCCESS(rc));
    1913 
    1914     if (   rc == VERR_VD_NOT_ENOUGH_METADATA
    1915         || rc == VERR_VD_IOCTX_HALT)
    1916     {
    1917         /* Save the current state. */
    1918         pIoCtx->Req.Io.uOffset    = uOffset;
    1919         pIoCtx->Req.Io.cbTransfer = cbToRead;
    1920         pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
    1921     }
    1922 
    1923     return rc;
    1924 }
    1925 
    1926 /**
    1927  * internal: parent image read wrapper for compacting.
    1928  */
    1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
    1930                         size_t cbRead)
    1931 {
    1932     PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
    1933     return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,
    1934                         pvBuf, cbRead, false /* fUpdateCache */);
    19351968}
    19361969
     
    19842017
    19852018/**
    1986  * internal: write a complete block (only used for diff images), taking the
    1987  * remaining data from parent images. This implementation does not optimize
    1988  * anything (except that it tries to read only that portions from parent
    1989  * images that are really needed).
    1990  */
    1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,
    1992                                  PVDIMAGE pImageParentOverride,
    1993                                  uint64_t uOffset, size_t cbWrite,
    1994                                  size_t cbThisWrite, size_t cbPreRead,
    1995                                  size_t cbPostRead, const void *pvBuf,
    1996                                  void *pvTmp)
    1997 {
    1998     int rc = VINF_SUCCESS;
    1999 
    2000     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2001                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2002 
    2003     /* Read the data that goes before the write to fill the block. */
    2004     if (cbPreRead)
    2005     {
    2006         /*
    2007          * Updating the cache doesn't make sense here because
    2008          * this will be done after the complete block was written.
    2009          */
    2010         rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2011                             uOffset - cbPreRead, pvTmp, cbPreRead,
    2012                             true /* fZeroFreeBlocks*/,
    2013                             false /* fUpdateCache */, 0);
    2014         if (RT_FAILURE(rc))
    2015             return rc;
    2016     }
    2017 
    2018     /* Copy the data to the right place in the buffer. */
    2019     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2020 
    2021     /* Read the data that goes after the write to fill the block. */
    2022     if (cbPostRead)
    2023     {
    2024         /* If we have data to be written, use that instead of reading
    2025          * data from the image. */
    2026         size_t cbWriteCopy;
    2027         if (cbWrite > cbThisWrite)
    2028             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2029         else
    2030             cbWriteCopy = 0;
    2031         /* Figure out how much we cannot read from the image, because
    2032          * the last block to write might exceed the nominal size of the
    2033          * image for technical reasons. */
    2034         size_t cbFill;
    2035         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2036             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2037         else
    2038             cbFill = 0;
    2039         /* The rest must be read from the image. */
    2040         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2041 
    2042         /* Now assemble the remaining data. */
    2043         if (cbWriteCopy)
    2044             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2045                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2046         if (cbReadImage)
    2047             rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2048                                 uOffset + cbThisWrite + cbWriteCopy,
    2049                                 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,
    2050                                 cbReadImage, true /* fZeroFreeBlocks */,
    2051                                 false /* fUpdateCache */, 0);
    2052         if (RT_FAILURE(rc))
    2053             return rc;
    2054         /* Zero out the remainder of this block. Will never be visible, as this
    2055          * is beyond the limit of the image. */
    2056         if (cbFill)
    2057             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2058                    '\0', cbFill);
    2059     }
    2060 
    2061     /* Write the full block to the virtual disk. */
    2062     RTSGSEG SegmentBuf;
    2063     RTSGBUF SgBuf;
    2064     VDIOCTX IoCtx;
    2065 
    2066     SegmentBuf.pvSeg = pvTmp;
    2067     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2068     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2069     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2070                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2071     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2072                                         cbPreRead + cbThisWrite + cbPostRead,
    2073                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2074     Assert(rc != VERR_VD_BLOCK_FREE);
    2075     Assert(cbPreRead == 0);
    2076     Assert(cbPostRead == 0);
    2077 
    2078     return rc;
    2079 }
    2080 
    2081 /**
    2082  * internal: write a complete block (only used for diff images), taking the
    2083  * remaining data from parent images. This implementation optimizes out writes
    2084  * that do not change the data relative to the state as of the parent images.
    2085  * All backends which support differential/growing images support this.
    2086  */
    2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,
    2088                                   PVDIMAGE pImageParentOverride,
    2089                                   uint64_t uOffset, size_t cbWrite,
    2090                                   size_t cbThisWrite, size_t cbPreRead,
    2091                                   size_t cbPostRead, const void *pvBuf,
    2092                                   void *pvTmp, unsigned cImagesRead)
    2093 {
    2094     size_t cbFill = 0;
    2095     size_t cbWriteCopy = 0;
    2096     size_t cbReadImage = 0;
    2097     int rc;
    2098 
    2099     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2100                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2101 
    2102     if (cbPostRead)
    2103     {
    2104         /* Figure out how much we cannot read from the image, because
    2105          * the last block to write might exceed the nominal size of the
    2106          * image for technical reasons. */
    2107         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2108             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2109 
    2110         /* If we have data to be written, use that instead of reading
    2111          * data from the image. */
    2112         if (cbWrite > cbThisWrite)
    2113             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2114 
    2115         /* The rest must be read from the image. */
    2116         cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2117     }
    2118 
    2119     /* Read the entire data of the block so that we can compare whether it will
    2120      * be modified by the write or not. */
    2121     rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,
    2122                         cbPreRead + cbThisWrite + cbPostRead - cbFill,
    2123                         true /* fZeroFreeBlocks */, false /* fUpdateCache */,
    2124                         cImagesRead);
    2125     if (RT_FAILURE(rc))
    2126         return rc;
    2127 
    2128     /* Check if the write would modify anything in this block. */
    2129     if (   !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)
    2130         && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,
    2131                                     (char *)pvBuf + cbThisWrite, cbWriteCopy)))
    2132     {
    2133         /* Block is completely unchanged, so no need to write anything. */
    2134         return VINF_SUCCESS;
    2135     }
    2136 
    2137     /* Copy the data to the right place in the buffer. */
    2138     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2139 
    2140     /* Handle the data that goes after the write to fill the block. */
    2141     if (cbPostRead)
    2142     {
    2143         /* Now assemble the remaining data. */
    2144         if (cbWriteCopy)
    2145             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2146                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2147         /* Zero out the remainder of this block. Will never be visible, as this
    2148          * is beyond the limit of the image. */
    2149         if (cbFill)
    2150             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2151                    '\0', cbFill);
    2152     }
    2153 
    2154     /* Write the full block to the virtual disk. */
    2155     RTSGSEG SegmentBuf;
    2156     RTSGBUF SgBuf;
    2157     VDIOCTX IoCtx;
    2158 
    2159     SegmentBuf.pvSeg = pvTmp;
    2160     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2161     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2162     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2163                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2164     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2165                                         cbPreRead + cbThisWrite + cbPostRead,
    2166                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2167     Assert(rc != VERR_VD_BLOCK_FREE);
    2168     Assert(cbPreRead == 0);
    2169     Assert(cbPostRead == 0);
    2170 
    2171     return rc;
    2172 }
    2173 
    2174 /**
    21752019 * internal: write buffer to the image, taking care of block boundaries and
    21762020 * write optimizations.
     
    21812025                           bool fUpdateCache, unsigned cImagesRead)
    21822026{
    2183     int rc;
    2184     unsigned fWrite;
    2185     size_t cbThisWrite;
    2186     size_t cbPreRead, cbPostRead;
    2187     uint64_t uOffsetCur = uOffset;
    2188     size_t cbWriteCur = cbWrite;
    2189     const void *pcvBufCur = pvBuf;
    2190     RTSGSEG SegmentBuf;
     2027    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     2028    RTSGSEG Segment;
    21912029    RTSGBUF SgBuf;
    21922030    VDIOCTX IoCtx;
    21932031
    2194     /* Loop until all written. */
    2195     do
    2196     {
    2197         /* Try to write the possibly partial block to the last opened image.
    2198          * This works when the block is already allocated in this image or
    2199          * if it is a full-block write (and allocation isn't suppressed below).
    2200          * For image formats which don't support zero blocks, it's beneficial
    2201          * to avoid unnecessarily allocating unchanged blocks. This prevents
    2202          * unwanted expanding of images. VMDK is an example. */
    2203         cbThisWrite = cbWriteCur;
    2204         fWrite =   (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
    2205                  ? 0 : VD_WRITE_NO_ALLOC;
    2206 
    2207         SegmentBuf.pvSeg = (void *)pcvBufCur;
    2208         SegmentBuf.cbSeg = cbWrite;
    2209         RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2210         vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2211                     &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2212         rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite,
    2213                                             &IoCtx, &cbThisWrite, &cbPreRead,
    2214                                             &cbPostRead, fWrite);
    2215         if (rc == VERR_VD_BLOCK_FREE)
    2216         {
    2217             void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead);
    2218             AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY);
    2219 
    2220             if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME))
    2221             {
    2222                 /* Optimized write, suppress writing to a so far unallocated
    2223                  * block if the data is in fact not changed. */
    2224                 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride,
    2225                                             uOffsetCur, cbWriteCur,
    2226                                             cbThisWrite, cbPreRead, cbPostRead,
    2227                                             pcvBufCur, pvTmp, cImagesRead);
    2228             }
    2229             else
    2230             {
    2231                 /* Normal write, not optimized in any way. The block will
    2232                  * be written no matter what. This will usually (unless the
    2233                  * backend has some further optimization enabled) cause the
    2234                  * block to be allocated. */
    2235                 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride,
    2236                                            uOffsetCur, cbWriteCur,
    2237                                            cbThisWrite, cbPreRead, cbPostRead,
    2238                                            pcvBufCur, pvTmp);
    2239             }
    2240             RTMemTmpFree(pvTmp);
    2241             if (RT_FAILURE(rc))
    2242                 break;
    2243         }
    2244 
    2245         cbWriteCur -= cbThisWrite;
    2246         uOffsetCur += cbThisWrite;
    2247         pcvBufCur = (char *)pcvBufCur + cbThisWrite;
    2248     } while (cbWriteCur != 0 && RT_SUCCESS(rc));
    2249 
    2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */
    2251     /* Update the cache on success */
    2252     if (   RT_SUCCESS(rc)
    2253         && pDisk->pCache
    2254         && fUpdateCache)
    2255         rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL);
    2256 
    2257     if (RT_SUCCESS(rc))
    2258         rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
    2259 #endif
    2260 
    2261     return rc;
     2032    if (fUpdateCache)
     2033        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     2034
     2035    Segment.pvSeg = (void *)pvBuf;
     2036    Segment.cbSeg = cbWrite;
     2037    RTSgBufInit(&SgBuf, &Segment, 1);
     2038    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
     2039                NULL, vdWriteHelperAsync, fFlags);
     2040
     2041    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     2042    IoCtx.Req.Io.cImagesRead = cImagesRead;
     2043    IoCtx.pIoCtxParent          = NULL;
     2044    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     2045    IoCtx.Type.Root.pvUser1     = pDisk;
     2046    IoCtx.Type.Root.pvUser2     = NULL;
     2047    return vdIoCtxProcessSync(&IoCtx);
    22622048}
    22632049
     
    24912277}
    24922278
    2493 /**
    2494  * internal: write a complete block (only used for diff images), taking the
    2495  * remaining data from parent images. This implementation does not optimize
    2496  * anything (except that it tries to read only that portions from parent
    2497  * images that are really needed) - async version.
    2498  */
    2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
    2500 {
    2501     int rc = VINF_SUCCESS;
    2502 
    2503 #if 0
    2504 
    2505     /* Read the data that goes before the write to fill the block. */
    2506     if (cbPreRead)
    2507     {
    2508         rc = vdReadHelperAsync(pIoCtxDst);
    2509         if (RT_FAILURE(rc))
    2510             return rc;
    2511     }
    2512 
    2513     /* Copy the data to the right place in the buffer. */
    2514     vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite);
    2515 
    2516     /* Read the data that goes after the write to fill the block. */
    2517     if (cbPostRead)
    2518     {
    2519         /* If we have data to be written, use that instead of reading
    2520          * data from the image. */
    2521         size_t cbWriteCopy;
    2522         if (cbWrite > cbThisWrite)
    2523             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2524         else
    2525             cbWriteCopy = 0;
    2526         /* Figure out how much we cannot read from the image, because
    2527          * the last block to write might exceed the nominal size of the
    2528          * image for technical reasons. */
    2529         size_t cbFill;
    2530         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2531             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2532         else
    2533             cbFill = 0;
    2534         /* The rest must be read from the image. */
    2535         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2536 
    2537         /* Now assemble the remaining data. */
    2538         if (cbWriteCopy)
    2539         {
    2540             vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy);
    2541             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy);
    2542         }
    2543 
    2544         if (cbReadImage)
    2545             rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst,
    2546                                    uOffset + cbThisWrite + cbWriteCopy,
    2547                                    cbReadImage);
    2548         if (RT_FAILURE(rc))
    2549             return rc;
    2550         /* Zero out the remainder of this block. Will never be visible, as this
    2551          * is beyond the limit of the image. */
    2552         if (cbFill)
    2553         {
    2554             vdIoCtxSet(pIoCtxDst, '\0', cbFill);
    2555             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill);
    2556         }
    2557     }
    2558 
    2559     if (   !pIoCtxDst->cbTransferLeft
    2560         && !pIoCtxDst->cMetaTransfersPending
    2561         && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false))
    2562     {
    2563         /* Write the full block to the virtual disk. */
    2564         vdIoCtxChildReset(pIoCtxDst);
    2565         rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2566                                             uOffset - cbPreRead,
    2567                                             cbPreRead + cbThisWrite + cbPostRead,
    2568                                             pIoCtxDst,
    2569                                             NULL, &cbPreRead, &cbPostRead, 0);
    2570         Assert(rc != VERR_VD_BLOCK_FREE);
    2571         Assert(cbPreRead == 0);
    2572         Assert(cbPostRead == 0);
    2573     }
    2574     else
    2575     {
    2576         LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    2577                  pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending,
    2578                  pIoCtxDst->fComplete));
    2579         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    2580     }
    2581 
    2582     return rc;
    2583 #endif
    2584     return VERR_NOT_IMPLEMENTED;
    2585 }
    2586 
    2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx)
     2279static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
    25882280{
    25892281    int rc             = VINF_SUCCESS;
     
    25952287    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    25962288    rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2597                                         pIoCtx->Req.Io.uOffset - cbPreRead,
    2598                                         cbPreRead + cbThisWrite + cbPostRead,
    2599                                         pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
     2289                                   pIoCtx->Req.Io.uOffset - cbPreRead,
     2290                                   cbPreRead + cbThisWrite + cbPostRead,
     2291                                   pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    26002292    Assert(rc != VERR_VD_BLOCK_FREE);
    26012293    Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
     
    26862378    /* Write the full block to the virtual disk. */
    26872379    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    2688     pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync;
     2380    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
    26892381
    26902382    return rc;
     
    26962388
    26972389    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2390
     2391    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
    26982392
    26992393    if (pIoCtx->Req.Io.cbTransferLeft)
     
    27622456    /* Next step */
    27632457    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
     2458    return VINF_SUCCESS;
     2459}
     2460
     2461static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
     2462{
     2463    int rc = VINF_SUCCESS;
     2464    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2465    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2466    PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
     2467
     2468    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2469
     2470    vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
     2471    if (cbPostRead)
     2472    {
     2473        size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
     2474        size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
     2475        size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
     2476
     2477        /* Now assemble the remaining data. */
     2478        if (cbWriteCopy)
     2479        {
     2480            /*
     2481             * The S/G buffer of the parent needs to be cloned because
     2482             * it is not allowed to modify the state.
     2483             */
     2484            RTSGBUF SgBufParentTmp;
     2485
     2486            RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
     2487            RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
     2488        }
     2489
     2490        /* Zero out the remainder of this block. Will never be visible, as this
     2491         * is beyond the limit of the image. */
     2492        if (cbFill)
     2493        {
     2494            RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
     2495            vdIoCtxSet(pIoCtx, '\0', cbFill);
     2496        }
     2497
     2498        if (cbReadImage)
     2499        {
     2500            /* Read remaining data. */
     2501        }
     2502        else
     2503        {
     2504            /* Write the full block to the virtual disk. */
     2505            RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2506            pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2507        }
     2508    }
     2509    else
     2510    {
     2511        /* Write the full block to the virtual disk. */
     2512        RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2513        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2514    }
     2515
     2516    return rc;
     2517}
     2518
     2519static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
     2520{
     2521    int rc = VINF_SUCCESS;
     2522
     2523    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2524
     2525    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     2526
     2527    if (pIoCtx->Req.Io.cbTransferLeft)
     2528        rc = vdReadHelperAsync(pIoCtx);
     2529
     2530    if (   RT_SUCCESS(rc)
     2531        && (   pIoCtx->Req.Io.cbTransferLeft
     2532            || pIoCtx->cMetaTransfersPending))
     2533        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     2534     else
     2535        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2536
     2537    return rc;
     2538}
     2539
     2540static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
     2541{
     2542    PVBOXHDD pDisk = pIoCtx->pDisk;
     2543    uint64_t uOffset   = pIoCtx->Type.Child.uOffsetSaved;
     2544    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2545    size_t cbPreRead   = pIoCtx->Type.Child.cbPreRead;
     2546    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2547    size_t cbWrite     = pIoCtx->Type.Child.cbWriteParent;
     2548    size_t cbFill = 0;
     2549    size_t cbWriteCopy = 0;
     2550    size_t cbReadImage = 0;
     2551
     2552    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2553
     2554    AssertPtr(pIoCtx->pIoCtxParent);
     2555    Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
     2556
     2557    /* Calculate the amount of data to read that goes after the write to fill the block. */
     2558    if (cbPostRead)
     2559    {
     2560        /* If we have data to be written, use that instead of reading
     2561         * data from the image. */
     2562        cbWriteCopy;
     2563        if (cbWrite > cbThisWrite)
     2564            cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
     2565
     2566        /* Figure out how much we cannot read from the image, because
     2567         * the last block to write might exceed the nominal size of the
     2568         * image for technical reasons. */
     2569        if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
     2570            cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
     2571
     2572        /* The rest must be read from the image. */
     2573        cbReadImage = cbPostRead - cbWriteCopy - cbFill;
     2574    }
     2575
     2576    pIoCtx->Type.Child.Write.Optimized.cbFill      = cbFill;
     2577    pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
     2578    pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
     2579
     2580    /* Next step */
     2581    if (cbPreRead)
     2582    {
     2583        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
     2584
     2585        /* Read the data that goes before the write to fill the block. */
     2586        pIoCtx->Req.Io.cbTransferLeft = cbPreRead;
     2587        pIoCtx->Req.Io.cbTransfer     = pIoCtx->Req.Io.cbTransferLeft;
     2588        pIoCtx->Req.Io.uOffset       -= cbPreRead;
     2589    }
     2590    else
     2591        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2592
    27642593    return VINF_SUCCESS;
    27652594}
     
    38983727    PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK);
    38993728
     3729    Log(("I/O task list cleared\n"));
     3730
    39003731    /* Reverse order. */
    39013732    PVDIOTASK pCur = pHead;
     
    40273858    PVBOXHDD pDisk = pIoStorage->pVDIo->pDisk;
    40283859
     3860    Log(("Deferring I/O task pIoTask=%p\n", pIoTask));
     3861
    40293862    /* Put it on the waiting list. */
    40303863    PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK);
     
    42264059
    42274060            void *pvTask;
     4061            Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
    42284062            rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
    42294063                                                   pIoStorage->pStorage, uOffset,
     
    43194153
    43204154            void *pvTask;
     4155            Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
    43214156            rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
    43224157                                                    pIoStorage->pStorage,
     
    96379472                                  pfnComplete, pvUser1, pvUser2,
    96389473                                  NULL, vdReadHelperAsync,
    9639                                   VDIOCTX_FLAGS_DEFAULT);
     9474                                  VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
    96409475        if (!pIoCtx)
    96419476        {
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette