VirtualBox

Changeset 44794 in vbox for trunk/src/VBox/Storage


Ignore:
Timestamp:
Feb 21, 2013 7:42:59 PM (12 years ago)
Author:
vboxsync
Message:

Storage: Another try to unify sync and async I/O code

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Storage/VD.cpp

    r44628 r44794  
    355355            /** S/G buffer */
    356356            RTSGBUF              SgBuf;
     357            /** Number of bytes to clear in the buffer before the current read. */
     358            size_t               cbBufClear;
     359            /** Number of images to read. */
     360            unsigned             cImagesRead;
     361            /** Override for the parent image to start reading from. */
     362            PVDIMAGE             pImageParentOverride;
    357363        } Io;
    358364        /** Discard requests. */
     
    424430
    425431/** Default flags for an I/O context, i.e. unblocked and async. */
    426 #define VDIOCTX_FLAGS_DEFAULT (0)
     432#define VDIOCTX_FLAGS_DEFAULT                   (0)
    427433/** Flag whether the context is blocked. */
    428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
     434#define VDIOCTX_FLAGS_BLOCKED          RT_BIT_32(0)
    429435/** Flag whether the I/O context is using synchronous I/O. */
    430 #define VDIOCTX_FLAGS_SYNC    RT_BIT_32(1)
     436#define VDIOCTX_FLAGS_SYNC             RT_BIT_32(1)
     437/** Flag whether the read should update the cache. */
     438#define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2)
     439/** Flag whether free blocks should be zeroed.
     440 * If false and no image has data for sepcified
     441 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
     442 * Note that unallocated blocks are still zeroed
     443 * if at least one image has valid data for a part
     444 * of the range.
     445 */
     446#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
     447/** Don't free the I/O context when complete because
     448 * it was alloacted elsewhere (stack, ...). */
     449#define VDIOCTX_FLAGS_DONT_FREE        RT_BIT_32(4)
    431450
    432451/** NIL I/O context pointer value. */
     
    577596/** Forward declaration of the async discard helper. */
    578597static int vdDiscardHelperAsync(PVDIOCTX pIoCtx);
     598static int vdWriteHelperAsync(PVDIOCTX pIoCtx);
    579599static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk);
    580600static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc);
     601static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
    581602
    582603/**
     
    809830    pIoCtx->Req.Io.pImageStart    = pImageStart;
    810831    pIoCtx->Req.Io.pImageCur      = pImageStart;
     832    pIoCtx->Req.Io.cbBufClear     = 0;
     833    pIoCtx->Req.Io.pImageParentOverride = NULL;
    811834    pIoCtx->cDataTransfersPending = 0;
    812835    pIoCtx->cMetaTransfersPending = 0;
     
    817840    pIoCtx->pfnIoCtxTransferNext  = NULL;
    818841    pIoCtx->rcReq                 = VINF_SUCCESS;
     842    pIoCtx->pIoCtxParent          = NULL;
    819843
    820844    /* There is no S/G list for a flush request. */
     
    845869 */
    846870static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
    847                              PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)
     871                             size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
    848872{
    849873    int rc = VINF_SUCCESS;
     
    908932
    909933/**
     934 * Creates a new empty discard state.
     935 *
     936 * @returns Pointer to the new discard state or NULL if out of memory.
     937 */
     938static PVDDISCARDSTATE vdDiscardStateCreate(void)
     939{
     940    PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
     941
     942    if (pDiscard)
     943    {
     944        RTListInit(&pDiscard->ListLru);
     945        pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
     946        if (!pDiscard->pTreeBlocks)
     947        {
     948            RTMemFree(pDiscard);
     949            pDiscard = NULL;
     950        }
     951    }
     952
     953    return pDiscard;
     954}
     955
     956/**
     957 * Removes the least recently used blocks from the waiting list until
     958 * the new value is reached.
     959 *
     960 * @returns VBox status code.
     961 * @param   pDisk              VD disk container.
     962 * @param   pDiscard           The discard state.
     963 * @param   cbDiscardingNew    How many bytes should be waiting on success.
     964 *                             The number of bytes waiting can be less.
     965 */
     966static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
     967{
     968    int rc = VINF_SUCCESS;
     969
     970    LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
     971                 pDisk, pDiscard, cbDiscardingNew));
     972
     973    while (pDiscard->cbDiscarding > cbDiscardingNew)
     974    {
     975        PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
     976
     977        Assert(!RTListIsEmpty(&pDiscard->ListLru));
     978
     979        /* Go over the allocation bitmap and mark all discarded sectors as unused. */
     980        uint64_t offStart = pBlock->Core.Key;
     981        uint32_t idxStart = 0;
     982        size_t cbLeft = pBlock->cbDiscard;
     983        bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
     984        uint32_t cSectors = pBlock->cbDiscard / 512;
     985
     986        while (cbLeft > 0)
     987        {
     988            int32_t idxEnd;
     989            size_t cbThis = cbLeft;
     990
     991            if (fAllocated)
     992            {
     993                /* Check for the first unallocated bit. */
     994                idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
     995                if (idxEnd != -1)
     996                {
     997                    cbThis = (idxEnd - idxStart) * 512;
     998                    fAllocated = false;
     999                }
     1000            }
     1001            else
     1002            {
     1003                /* Mark as unused and check for the first set bit. */
     1004                idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
     1005                if (idxEnd != -1)
     1006                    cbThis = (idxEnd - idxStart) * 512;
     1007
     1008
     1009                VDIOCTX IoCtx;
     1010                vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
     1011                            NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1012                rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
     1013                                                            &IoCtx, offStart, cbThis, NULL,
     1014                                                            NULL, &cbThis, NULL,
     1015                                                            VD_DISCARD_MARK_UNUSED);
     1016                if (RT_FAILURE(rc))
     1017                    break;
     1018
     1019                fAllocated = true;
     1020            }
     1021
     1022            idxStart  = idxEnd;
     1023            offStart += cbThis;
     1024            cbLeft   -= cbThis;
     1025        }
     1026
     1027        if (RT_FAILURE(rc))
     1028            break;
     1029
     1030        PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
     1031        Assert(pBlockRemove == pBlock);
     1032        RTListNodeRemove(&pBlock->NodeLru);
     1033
     1034        pDiscard->cbDiscarding -= pBlock->cbDiscard;
     1035        RTMemFree(pBlock->pbmAllocated);
     1036        RTMemFree(pBlock);
     1037    }
     1038
     1039    Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
     1040
     1041    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1042    return rc;
     1043}
     1044
     1045/**
     1046 * Destroys the current discard state, writing any waiting blocks to the image.
     1047 *
     1048 * @returns VBox status code.
     1049 * @param   pDisk    VD disk container.
     1050 */
     1051static int vdDiscardStateDestroy(PVBOXHDD pDisk)
     1052{
     1053    int rc = VINF_SUCCESS;
     1054
     1055    if (pDisk->pDiscard)
     1056    {
     1057        rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
     1058        AssertRC(rc);
     1059        RTMemFree(pDisk->pDiscard->pTreeBlocks);
     1060        RTMemFree(pDisk->pDiscard);
     1061        pDisk->pDiscard = NULL;
     1062    }
     1063
     1064    return rc;
     1065}
     1066
     1067/**
     1068 * Marks the given range as allocated in the image.
     1069 * Required if there are discards in progress and a write to a block which can get discarded
     1070 * is written to.
     1071 *
     1072 * @returns VBox status code.
     1073 * @param   pDisk    VD container data.
     1074 * @param   uOffset  First byte to mark as allocated.
     1075 * @param   cbRange  Number of bytes to mark as allocated.
     1076 */
     1077static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
     1078{
     1079    PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
     1080    int rc = VINF_SUCCESS;
     1081
     1082    if (pDiscard)
     1083    {
     1084        do
     1085        {
     1086            size_t cbThisRange = cbRange;
     1087            PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
     1088
     1089            if (pBlock)
     1090            {
     1091                int32_t idxStart, idxEnd;
     1092
     1093                Assert(!(cbThisRange % 512));
     1094                Assert(!((uOffset - pBlock->Core.Key) % 512));
     1095
     1096                cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
     1097
     1098                idxStart = (uOffset - pBlock->Core.Key) / 512;
     1099                idxEnd = idxStart + (cbThisRange / 512);
     1100                ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
     1101            }
     1102            else
     1103            {
     1104                pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
     1105                if (pBlock)
     1106                    cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
     1107            }
     1108
     1109            Assert(cbRange >= cbThisRange);
     1110
     1111            uOffset += cbThisRange;
     1112            cbRange -= cbThisRange;
     1113        } while (cbRange != 0);
     1114    }
     1115
     1116    return rc;
     1117}
     1118
     1119DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1120                                  uint64_t uOffset, size_t cbTransfer,
     1121                                  PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
     1122                                  void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1123                                  uint32_t fFlags)
     1124{
     1125    PVDIOCTX pIoCtx = NULL;
     1126
     1127    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1128    if (RT_LIKELY(pIoCtx))
     1129    {
     1130        vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1131                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1132    }
     1133
     1134    return pIoCtx;
     1135}
     1136
     1137DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1138                                      uint64_t uOffset, size_t cbTransfer,
     1139                                      PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1140                                      PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1141                                      void *pvUser1, void *pvUser2,
     1142                                      void *pvAllocation,
     1143                                      PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1144                                      uint32_t fFlags)
     1145{
     1146    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1147                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1148
     1149    if (RT_LIKELY(pIoCtx))
     1150    {
     1151        pIoCtx->pIoCtxParent          = NULL;
     1152        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1153        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1154        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1155    }
     1156
     1157    LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
     1158    return pIoCtx;
     1159}
     1160
     1161DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
     1162                                         unsigned cRanges,
     1163                                         PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1164                                         void *pvUser1, void *pvUser2,
     1165                                         void *pvAllocation,
     1166                                         PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1167                                         uint32_t fFlags)
     1168{
     1169    PVDIOCTX pIoCtx = NULL;
     1170
     1171    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1172    if (RT_LIKELY(pIoCtx))
     1173    {
     1174        pIoCtx->pIoCtxNext                = NULL;
     1175        pIoCtx->pDisk                     = pDisk;
     1176        pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
     1177        pIoCtx->cDataTransfersPending     = 0;
     1178        pIoCtx->cMetaTransfersPending     = 0;
     1179        pIoCtx->fComplete                 = false;
     1180        pIoCtx->fFlags                    = fFlags;
     1181        pIoCtx->pvAllocation              = pvAllocation;
     1182        pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
     1183        pIoCtx->pfnIoCtxTransferNext      = NULL;
     1184        pIoCtx->rcReq                     = VINF_SUCCESS;
     1185        pIoCtx->Req.Discard.paRanges      = paRanges;
     1186        pIoCtx->Req.Discard.cRanges       = cRanges;
     1187        pIoCtx->Req.Discard.idxRange      = 0;
     1188        pIoCtx->Req.Discard.cbDiscardLeft = 0;
     1189        pIoCtx->Req.Discard.offCur        = 0;
     1190        pIoCtx->Req.Discard.cbThisDiscard = 0;
     1191
     1192        pIoCtx->pIoCtxParent          = NULL;
     1193        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1194        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1195        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1196    }
     1197
     1198    LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
     1199    return pIoCtx;
     1200}
     1201
     1202DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1203                                       uint64_t uOffset, size_t cbTransfer,
     1204                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1205                                       PVDIOCTX pIoCtxParent, size_t cbTransferParent,
     1206                                       size_t cbWriteParent, void *pvAllocation,
     1207                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
     1208{
     1209    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1210                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);
     1211
     1212    AssertPtr(pIoCtxParent);
     1213    Assert(!pIoCtxParent->pIoCtxParent);
     1214
     1215    if (RT_LIKELY(pIoCtx))
     1216    {
     1217        pIoCtx->pIoCtxParent                   = pIoCtxParent;
     1218        pIoCtx->Type.Child.uOffsetSaved        = uOffset;
     1219        pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
     1220        pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
     1221        pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
     1222    }
     1223
     1224    LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
     1225    return pIoCtx;
     1226}
     1227
     1228DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
     1229{
     1230    PVDIOTASK pIoTask = NULL;
     1231
     1232    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1233    if (pIoTask)
     1234    {
     1235        pIoTask->pIoStorage           = pIoStorage;
     1236        pIoTask->pfnComplete          = pfnComplete;
     1237        pIoTask->pvUser               = pvUser;
     1238        pIoTask->fMeta                = false;
     1239        pIoTask->Type.User.cbTransfer = cbTransfer;
     1240        pIoTask->Type.User.pIoCtx     = pIoCtx;
     1241    }
     1242
     1243    return pIoTask;
     1244}
     1245
     1246DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
     1247{
     1248    PVDIOTASK pIoTask = NULL;
     1249
     1250    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1251    if (pIoTask)
     1252    {
     1253        pIoTask->pIoStorage          = pIoStorage;
     1254        pIoTask->pfnComplete         = pfnComplete;
     1255        pIoTask->pvUser              = pvUser;
     1256        pIoTask->fMeta               = true;
     1257        pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
     1258    }
     1259
     1260    return pIoTask;
     1261}
     1262
     1263DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1264{
     1265    LogFlow(("Freeing I/O context %#p\n", pIoCtx));
     1266
     1267    if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
     1268    {
     1269        if (pIoCtx->pvAllocation)
     1270            RTMemFree(pIoCtx->pvAllocation);
     1271#ifdef DEBUG
     1272        memset(pIoCtx, 0xff, sizeof(VDIOCTX));
     1273#endif
     1274        RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
     1275    }
     1276}
     1277
     1278DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
     1279{
     1280    RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
     1281}
     1282
     1283DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
     1284{
     1285    AssertPtr(pIoCtx->pIoCtxParent);
     1286
     1287    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     1288    pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
     1289    pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
     1290}
     1291
     1292DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
     1293{
     1294    PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
     1295
     1296    if (RT_LIKELY(pMetaXfer))
     1297    {
     1298        pMetaXfer->Core.Key     = uOffset;
     1299        pMetaXfer->Core.KeyLast = uOffset + cb - 1;
     1300        pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
     1301        pMetaXfer->cbMeta       = cb;
     1302        pMetaXfer->pIoStorage   = pIoStorage;
     1303        pMetaXfer->cRefs        = 0;
     1304        RTListInit(&pMetaXfer->ListIoCtxWaiting);
     1305    }
     1306    return pMetaXfer;
     1307}
     1308
     1309DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
     1310{
     1311    /* Put it on the waiting list. */
     1312    PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
     1313    PVDIOCTX pHeadOld;
     1314    pIoCtx->pIoCtxNext = pNext;
     1315    while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
     1316    {
     1317        pNext = pHeadOld;
     1318        Assert(pNext != pIoCtx);
     1319        pIoCtx->pIoCtxNext = pNext;
     1320        ASMNopPause();
     1321    }
     1322}
     1323
     1324DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1325{
     1326    LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
     1327
     1328    Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
     1329    pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1330    vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
     1331}
     1332
     1333static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
     1334{
     1335    return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
     1336}
     1337
     1338static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
     1339{
     1340    return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
     1341}
     1342
     1343static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
     1344{
     1345    return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1346}
     1347
     1348static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
     1349{
     1350    return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1351}
     1352
     1353static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
     1354{
     1355    return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
     1356}
     1357
     1358/**
     1359 * Process the I/O context, core method which assumes that the I/O context
     1360 * acquired the lock.
     1361 *
     1362 * @returns VBox status code.
     1363 * @param   pIoCtx    I/O context to process.
     1364 */
     1365static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
     1366{
     1367    int rc = VINF_SUCCESS;
     1368
     1369    VD_IS_LOCKED(pIoCtx->pDisk);
     1370
     1371    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     1372
     1373    if (   !pIoCtx->cMetaTransfersPending
     1374        && !pIoCtx->cDataTransfersPending
     1375        && !pIoCtx->pfnIoCtxTransfer)
     1376    {
     1377        rc = VINF_VD_ASYNC_IO_FINISHED;
     1378        goto out;
     1379    }
     1380
     1381    /*
     1382     * We complete the I/O context in case of an error
     1383     * if there is no I/O task pending.
     1384     */
     1385    if (   RT_FAILURE(pIoCtx->rcReq)
     1386        && !pIoCtx->cMetaTransfersPending
     1387        && !pIoCtx->cDataTransfersPending)
     1388    {
     1389        rc = VINF_VD_ASYNC_IO_FINISHED;
     1390        goto out;
     1391    }
     1392
     1393    /* Don't change anything if there is a metadata transfer pending or we are blocked. */
     1394    if (   pIoCtx->cMetaTransfersPending
     1395        || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
     1396    {
     1397        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1398        goto out;
     1399    }
     1400
     1401    if (pIoCtx->pfnIoCtxTransfer)
     1402    {
     1403        /* Call the transfer function advancing to the next while there is no error. */
     1404        while (   pIoCtx->pfnIoCtxTransfer
     1405               && !pIoCtx->cMetaTransfersPending
     1406               && RT_SUCCESS(rc))
     1407        {
     1408            LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
     1409            rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
     1410
     1411            /* Advance to the next part of the transfer if the current one succeeded. */
     1412            if (RT_SUCCESS(rc))
     1413            {
     1414                pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
     1415                pIoCtx->pfnIoCtxTransferNext = NULL;
     1416            }
     1417        }
     1418    }
     1419
     1420    if (   RT_SUCCESS(rc)
     1421        && !pIoCtx->cMetaTransfersPending
     1422        && !pIoCtx->cDataTransfersPending)
     1423        rc = VINF_VD_ASYNC_IO_FINISHED;
     1424    else if (   RT_SUCCESS(rc)
     1425             || rc == VERR_VD_NOT_ENOUGH_METADATA
     1426             || rc == VERR_VD_IOCTX_HALT)
     1427        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1428    else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
     1429    {
     1430        ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
     1431        /*
     1432         * The I/O context completed if we have an error and there is no data
     1433         * or meta data transfer pending.
     1434         */
     1435        if (   !pIoCtx->cMetaTransfersPending
     1436            && !pIoCtx->cDataTransfersPending)
     1437            rc = VINF_VD_ASYNC_IO_FINISHED;
     1438        else
     1439            rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1440    }
     1441
     1442out:
     1443    LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
     1444                 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
     1445                 pIoCtx->fComplete));
     1446
     1447    return rc;
     1448}
     1449
     1450/**
     1451 * Processes the list of waiting I/O contexts.
     1452 *
     1453 * @returns VBox status code.
     1454 * @param   pDisk    The disk structure.
     1455 * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
     1456 *                   The status code is returned. NULL if there is no I/O context
     1457 *                   to return the status code for.
     1458 */
     1459static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
     1460{
     1461    int rc = VINF_SUCCESS;
     1462
     1463    LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
     1464
     1465    VD_IS_LOCKED(pDisk);
     1466
     1467    /* Get the waiting list and process it in FIFO order. */
     1468    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
     1469
     1470    /* Reverse it. */
     1471    PVDIOCTX pCur = pIoCtxHead;
     1472    pIoCtxHead = NULL;
     1473    while (pCur)
     1474    {
     1475        PVDIOCTX pInsert = pCur;
     1476        pCur = pCur->pIoCtxNext;
     1477        pInsert->pIoCtxNext = pIoCtxHead;
     1478        pIoCtxHead = pInsert;
     1479    }
     1480
     1481    /* Process now. */
     1482    pCur = pIoCtxHead;
     1483    while (pCur)
     1484    {
     1485        int rcTmp;
     1486        PVDIOCTX pTmp = pCur;
     1487
     1488        pCur = pCur->pIoCtxNext;
     1489        pTmp->pIoCtxNext = NULL;
     1490
     1491        /*
     1492         * Need to clear the sync flag here if there is a new I/O context
     1493         * with it set and the context is not given in pIoCtxRc.
     1494         * This happens most likely on a different thread and that one shouldn't
     1495         * process the context synchronously.
     1496         *
     1497         * The thread who issued the context will wait on the event semaphore
     1498         * anyway which is signalled when the completion handler is called.
     1499         */
     1500        if (   pTmp->fFlags & VDIOCTX_FLAGS_SYNC
     1501            && pTmp != pIoCtxRc)
     1502            pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
     1503
     1504        rcTmp = vdIoCtxProcessLocked(pTmp);
     1505        if (pTmp == pIoCtxRc)
     1506        {
     1507            /* The given I/O context was processed, pass the return code to the caller. */
     1508            rc = rcTmp;
     1509        }
     1510        else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
     1511                 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1512        {
     1513            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1514            vdThreadFinishWrite(pDisk);
     1515            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1516                                        pTmp->Type.Root.pvUser2,
     1517                                        pTmp->rcReq);
     1518            vdIoCtxFree(pDisk, pTmp);
     1519        }
     1520    }
     1521
     1522    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1523    return rc;
     1524}
     1525
     1526/**
     1527 * Processes the list of blocked I/O contexts.
     1528 *
     1529 * @returns nothing.
     1530 * @param   pDisk    The disk structure.
     1531 */
     1532static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
     1533{
     1534    LogFlowFunc(("pDisk=%#p\n", pDisk));
     1535
     1536    VD_IS_LOCKED(pDisk);
     1537
     1538    /* Get the waiting list and process it in FIFO order. */
     1539    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
     1540
     1541    /* Reverse it. */
     1542    PVDIOCTX pCur = pIoCtxHead;
     1543    pIoCtxHead = NULL;
     1544    while (pCur)
     1545    {
     1546        PVDIOCTX pInsert = pCur;
     1547        pCur = pCur->pIoCtxNext;
     1548        pInsert->pIoCtxNext = pIoCtxHead;
     1549        pIoCtxHead = pInsert;
     1550    }
     1551
     1552    /* Process now. */
     1553    pCur = pIoCtxHead;
     1554    while (pCur)
     1555    {
     1556        int rc;
     1557        PVDIOCTX pTmp = pCur;
     1558
     1559        pCur = pCur->pIoCtxNext;
     1560        pTmp->pIoCtxNext = NULL;
     1561
     1562        Assert(!pTmp->pIoCtxParent);
     1563        Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
     1564        pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
     1565
     1566        rc = vdIoCtxProcessLocked(pTmp);
     1567        if (   rc == VINF_VD_ASYNC_IO_FINISHED
     1568            && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1569        {
     1570            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1571            vdThreadFinishWrite(pDisk);
     1572            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1573                                        pTmp->Type.Root.pvUser2,
     1574                                        pTmp->rcReq);
     1575            vdIoCtxFree(pDisk, pTmp);
     1576        }
     1577    }
     1578
     1579    LogFlowFunc(("returns\n"));
     1580}
     1581
     1582/**
     1583 * Processes the I/O context trying to lock the criticial section.
     1584 * The context is deferred if the critical section is busy.
     1585 *
     1586 * @returns VBox status code.
     1587 * @param   pIoCtx    The I/O context to process.
     1588 */
     1589static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
     1590{
     1591    int rc = VINF_SUCCESS;
     1592    PVBOXHDD pDisk = pIoCtx->pDisk;
     1593
     1594    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     1595
     1596    /* Put it on the waiting list first. */
     1597    vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
     1598
     1599    if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
     1600    {
     1601        /* Leave it again, the context will be processed just before leaving the lock. */
     1602        LogFlowFunc(("Successfully acquired the lock\n"));
     1603        rc = vdDiskUnlock(pDisk, pIoCtx);
     1604    }
     1605    else
     1606    {
     1607        LogFlowFunc(("Lock is held\n"));
     1608        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1609    }
     1610
     1611    return rc;
     1612}
     1613
     1614/**
     1615 * Process the I/O context in a synchronous manner, waiting
     1616 * for it to complete.
     1617 *
     1618 * @returns VBox status code of the completed request.
     1619 * @param   pIoCtx    The sync I/O context.
     1620 */
     1621static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
     1622{
     1623    int rc = VINF_SUCCESS;
     1624    PVBOXHDD pDisk = pIoCtx->pDisk;
     1625
     1626    LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
     1627
     1628    AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
     1629              ("I/O context is not marked as synchronous\n"));
     1630
     1631    rc = vdIoCtxProcessTryLockDefer(pIoCtx);
     1632    if (rc == VINF_VD_ASYNC_IO_FINISHED)
     1633        rc = VINF_SUCCESS;
     1634
     1635    if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1636    {
     1637        rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
     1638        AssertRC(rc);
     1639
     1640        rc = pDisk->rcSync;
     1641    }
     1642    else /* Success or error. */
     1643        vdIoCtxFree(pDisk, pIoCtx);
     1644
     1645    return rc;
     1646}
     1647
     1648DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1649{
     1650    return pDisk->pIoCtxLockOwner == pIoCtx;
     1651}
     1652
     1653static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1654{
     1655    int rc = VINF_SUCCESS;
     1656
     1657    VD_IS_LOCKED(pDisk);
     1658
     1659    LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
     1660
     1661    if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
     1662    {
     1663        Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
     1664        vdIoCtxDefer(pDisk, pIoCtx);
     1665        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1666    }
     1667
     1668    LogFlowFunc(("returns -> %Rrc\n", rc));
     1669    return rc;
     1670}
     1671
     1672static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
     1673{
     1674    LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
     1675                 pDisk, pIoCtx, fProcessBlockedReqs));
     1676
     1677    VD_IS_LOCKED(pDisk);
     1678
     1679    LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
     1680    Assert(pDisk->pIoCtxLockOwner == pIoCtx);
     1681    ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
     1682
     1683    if (fProcessBlockedReqs)
     1684    {
     1685        /* Process any blocked writes if the current request didn't caused another growing. */
     1686        vdDiskProcessBlockedIoCtx(pDisk);
     1687    }
     1688
     1689    LogFlowFunc(("returns\n"));
     1690}
     1691
     1692/**
    9101693 * Internal: Reads a given amount of data from the image chain of the disk.
    9111694 **/
    9121695static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
    913                             uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)
     1696                            uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
    9141697{
    9151698    int rc = VINF_SUCCESS;
    9161699    size_t cbThisRead = cbRead;
    917     RTSGSEG SegmentBuf;
    918     RTSGBUF SgBuf;
    919     VDIOCTX IoCtx;
    9201700
    9211701    AssertPtr(pcbThisRead);
    9221702
    9231703    *pcbThisRead = 0;
    924 
    925     SegmentBuf.pvSeg = pvBuf;
    926     SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
    927     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    928     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    929                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    9301704
    9311705    /*
     
    9341708     */
    9351709    rc = pImage->Backend->pfnRead(pImage->pBackendData,
    936                                        uOffset, cbThisRead, &IoCtx,
    937                                        &cbThisRead);
     1710                                  uOffset, cbThisRead, pIoCtx,
     1711                                  &cbThisRead);
    9381712
    9391713    if (rc == VERR_VD_BLOCK_FREE)
     
    9441718        {
    9451719            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    946                                                    uOffset, cbThisRead, &IoCtx,
    947                                                    &cbThisRead);
     1720                                              uOffset, cbThisRead, pIoCtx,
     1721                                              &cbThisRead);
    9481722        }
    9491723    }
     
    9521726        *pcbThisRead = cbThisRead;
    9531727
     1728    return rc;
     1729}
     1730
     1731/**
     1732 * internal: read the specified amount of data in whatever blocks the backend
     1733 * will give us - async version.
     1734 */
     1735static int vdReadHelperAsync(PVDIOCTX pIoCtx)
     1736{
     1737    int rc;
     1738    PVBOXHDD pDisk                = pIoCtx->pDisk;
     1739    size_t cbToRead               = pIoCtx->Req.Io.cbTransfer;
     1740    uint64_t uOffset              = pIoCtx->Req.Io.uOffset;
     1741    PVDIMAGE pCurrImage           = pIoCtx->Req.Io.pImageCur;
     1742    PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
     1743    unsigned cImagesRead          = pIoCtx->Req.Io.cImagesRead;
     1744    size_t cbThisRead;
     1745
     1746    /* Loop until all reads started or we have a backend which needs to read metadata. */
     1747    do
     1748    {
     1749        /* Search for image with allocated block. Do not attempt to read more
     1750         * than the previous reads marked as valid. Otherwise this would return
     1751         * stale data when different block sizes are used for the images. */
     1752        cbThisRead = cbToRead;
     1753
     1754        if (   pDisk->pCache
     1755            && !pImageParentOverride)
     1756        {
     1757            rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
     1758                                   pIoCtx, &cbThisRead);
     1759            if (rc == VERR_VD_BLOCK_FREE)
     1760            {
     1761                rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
     1762                                      pIoCtx, &cbThisRead);
     1763
     1764                /* If the read was successful, write the data back into the cache. */
     1765                if (   RT_SUCCESS(rc)
     1766                    && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE)
     1767                {
     1768                    rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
     1769                                            pIoCtx, NULL);
     1770                }
     1771            }
     1772        }
     1773        else
     1774        {
     1775
     1776            /*
     1777             * Try to read from the given image.
     1778             * If the block is not allocated read from override chain if present.
     1779             */
     1780            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1781                                              uOffset, cbThisRead, pIoCtx,
     1782                                              &cbThisRead);
     1783
     1784            if (   rc == VERR_VD_BLOCK_FREE
     1785                && cImagesRead != 1)
     1786            {
     1787                unsigned cImagesToProcess = cImagesRead;
     1788
     1789                pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
     1790                pIoCtx->Req.Io.pImageParentOverride = NULL;
     1791
     1792                while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
     1793                {
     1794                    rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1795                                                      uOffset, cbThisRead,
     1796                                                      pIoCtx, &cbThisRead);
     1797                    if (cImagesToProcess == 1)
     1798                        break;
     1799                    else if (cImagesToProcess > 0)
     1800                        cImagesToProcess--;
     1801
     1802                    if (rc == VERR_VD_BLOCK_FREE)
     1803                        pCurrImage = pCurrImage->pPrev;
     1804                }
     1805            }
     1806        }
     1807
     1808        /* The task state will be updated on success already, don't do it here!. */
     1809        if (rc == VERR_VD_BLOCK_FREE)
     1810        {
     1811            /* No image in the chain contains the data for the block. */
     1812            ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
     1813
     1814            /* Fill the free space with 0 if we are told to do so
     1815             * or a previous read returned valid data. */
     1816            if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
     1817                vdIoCtxSet(pIoCtx, '\0', cbThisRead);
     1818            else
     1819                pIoCtx->Req.Io.cbBufClear += cbThisRead;
     1820
     1821            if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
     1822                rc = VINF_VD_NEW_ZEROED_BLOCK;
     1823            else
     1824                rc = VINF_SUCCESS;
     1825        }
     1826        else if (rc == VERR_VD_IOCTX_HALT)
     1827        {
     1828            uOffset  += cbThisRead;
     1829            cbToRead -= cbThisRead;
     1830            pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1831        }
     1832        else if (   RT_SUCCESS(rc)
     1833                 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1834        {
     1835            /* First not free block, fill the space before with 0. */
     1836            if (   pIoCtx->Req.Io.cbBufClear
     1837                && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
     1838            {
     1839                RTSGBUF SgBuf;
     1840                RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
     1841                RTSgBufReset(&SgBuf);
     1842                RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
     1843                pIoCtx->Req.Io.cbBufClear = 0;
     1844                pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     1845            }
     1846            rc = VINF_SUCCESS;
     1847        }
     1848
     1849        if (RT_FAILURE(rc))
     1850            break;
     1851
     1852        cbToRead -= cbThisRead;
     1853        uOffset  += cbThisRead;
     1854        pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
     1855    } while (cbToRead != 0 && RT_SUCCESS(rc));
     1856
     1857    if (   rc == VERR_VD_NOT_ENOUGH_METADATA
     1858        || rc == VERR_VD_IOCTX_HALT)
     1859    {
     1860        /* Save the current state. */
     1861        pIoCtx->Req.Io.uOffset    = uOffset;
     1862        pIoCtx->Req.Io.cbTransfer = cbToRead;
     1863        pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
     1864    }
     1865
     1866    return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
     1867           ? VERR_VD_BLOCK_FREE
     1868           : rc;
     1869}
     1870
     1871/**
     1872 * internal: parent image read wrapper for compacting.
     1873 */
     1874static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
     1875                        size_t cbRead)
     1876{
     1877    PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
     1878
     1879    /** @todo
     1880     * Only used for compaction so far which is not possible to mix with async I/O.
     1881     * Needs to be changed if we want to support online compaction of images.
     1882     */
     1883    bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
     1884    AssertMsgReturn(!fLocked,
     1885                    ("Calling synchronous parent read while another thread holds the disk lock\n"),
     1886                    VERR_VD_INVALID_STATE);
     1887
     1888    /* Fake an I/O context. */
     1889    RTSGSEG Segment;
     1890    RTSGBUF SgBuf;
     1891    VDIOCTX IoCtx;
     1892
     1893    Segment.pvSeg = pvBuf;
     1894    Segment.cbSeg = cbRead;
     1895    RTSgBufInit(&SgBuf, &Segment, 1);
     1896    vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
     1897                &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1898    int rc = vdReadHelperAsync(&IoCtx);
     1899    ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
    9541900    return rc;
    9551901}
     
    9841930                          bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
    9851931{
    986     int rc = VINF_SUCCESS;
    987     size_t cbThisRead;
    988     bool fAllFree = true;
    989     size_t cbBufClear = 0;
    990 
    991     /* Loop until all read. */
    992     do
    993     {
    994         /* Search for image with allocated block. Do not attempt to read more
    995          * than the previous reads marked as valid. Otherwise this would return
    996          * stale data when different block sizes are used for the images. */
    997         cbThisRead = cbRead;
    998 
    999         if (   pDisk->pCache
    1000             && !pImageParentOverride)
    1001         {
    1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1003             rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf,
    1004                                    cbThisRead, &cbThisRead);
    1005 #endif
    1006             if (rc == VERR_VD_BLOCK_FREE)
    1007             {
    1008                 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead,
    1009                                       &cbThisRead);
    1010 
    1011                 /* If the read was successful, write the data back into the cache. */
    1012                 if (   RT_SUCCESS(rc)
    1013                     && fUpdateCache)
    1014                 {
    1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1016                     rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf,
    1017                                             cbThisRead, NULL);
    1018 #endif
    1019                 }
    1020             }
    1021         }
    1022         else
    1023         {
    1024             RTSGSEG SegmentBuf;
    1025             RTSGBUF SgBuf;
    1026             VDIOCTX IoCtx;
    1027 
    1028             SegmentBuf.pvSeg = pvBuf;
    1029             SegmentBuf.cbSeg = cbThisRead;
    1030             RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    1031             vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    1032                         &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1033 
    1034             /*
    1035              * Try to read from the given image.
    1036              * If the block is not allocated read from override chain if present.
    1037              */
    1038             rc = pImage->Backend->pfnRead(pImage->pBackendData,
    1039                                                uOffset, cbThisRead, &IoCtx,
    1040                                                &cbThisRead);
    1041 
    1042             if (   rc == VERR_VD_BLOCK_FREE
    1043                 && cImagesRead != 1)
    1044             {
    1045                 unsigned cImagesToProcess = cImagesRead;
    1046 
    1047                 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
    1048                      pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
    1049                      pCurrImage = pCurrImage->pPrev)
    1050                 {
    1051                     rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1052                                                            uOffset, cbThisRead,
    1053                                                            &IoCtx, &cbThisRead);
    1054                     if (cImagesToProcess == 1)
    1055                         break;
    1056                     else if (cImagesToProcess > 0)
    1057                         cImagesToProcess--;
    1058                 }
    1059             }
    1060         }
    1061 
    1062         /* No image in the chain contains the data for the block. */
    1063         if (rc == VERR_VD_BLOCK_FREE)
    1064         {
    1065             /* Fill the free space with 0 if we are told to do so
    1066              * or a previous read returned valid data. */
    1067             if (fZeroFreeBlocks || !fAllFree)
    1068                 memset(pvBuf, '\0', cbThisRead);
    1069             else
    1070                 cbBufClear += cbThisRead;
    1071 
    1072             if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
    1073                 rc = VINF_VD_NEW_ZEROED_BLOCK;
    1074             else
    1075                 rc = VINF_SUCCESS;
    1076         }
    1077         else if (RT_SUCCESS(rc))
    1078         {
    1079             /* First not free block, fill the space before with 0. */
    1080             if (!fZeroFreeBlocks)
    1081             {
    1082                 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear);
    1083                 cbBufClear = 0;
    1084                 fAllFree = false;
    1085             }
    1086         }
    1087 
    1088         cbRead -= cbThisRead;
    1089         uOffset += cbThisRead;
    1090         pvBuf = (char *)pvBuf + cbThisRead;
    1091     } while (cbRead != 0 && RT_SUCCESS(rc));
    1092 
    1093     return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc;
     1932    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     1933    RTSGSEG Segment;
     1934    RTSGBUF SgBuf;
     1935    VDIOCTX IoCtx;
     1936
     1937    if (fZeroFreeBlocks)
     1938        fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     1939    if (fUpdateCache)
     1940        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     1941
     1942    Segment.pvSeg = pvBuf;
     1943    Segment.cbSeg = cbRead;
     1944    RTSgBufInit(&SgBuf, &Segment, 1);
     1945    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
     1946                NULL, vdReadHelperAsync, fFlags);
     1947
     1948    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     1949    IoCtx.Req.Io.cImagesRead = cImagesRead;
     1950    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     1951    IoCtx.Type.Root.pvUser1     = pDisk;
     1952    IoCtx.Type.Root.pvUser2     = NULL;
     1953    return vdIoCtxProcessSync(&IoCtx);
    10941954}
    10951955
     
    11031963    return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
    11041964                          true /* fZeroFreeBlocks */, fUpdateCache, 0);
    1105 }
    1106 
    1107 /**
    1108  * Creates a new empty discard state.
    1109  *
    1110  * @returns Pointer to the new discard state or NULL if out of memory.
    1111  */
    1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)
    1113 {
    1114     PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
    1115 
    1116     if (pDiscard)
    1117     {
    1118         RTListInit(&pDiscard->ListLru);
    1119         pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
    1120         if (!pDiscard->pTreeBlocks)
    1121         {
    1122             RTMemFree(pDiscard);
    1123             pDiscard = NULL;
    1124         }
    1125     }
    1126 
    1127     return pDiscard;
    1128 }
    1129 
    1130 /**
    1131  * Removes the least recently used blocks from the waiting list until
    1132  * the new value is reached.
    1133  *
    1134  * @returns VBox status code.
    1135  * @param   pDisk              VD disk container.
    1136  * @param   pDiscard           The discard state.
    1137  * @param   cbDiscardingNew    How many bytes should be waiting on success.
    1138  *                             The number of bytes waiting can be less.
    1139  */
    1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
    1141 {
    1142     int rc = VINF_SUCCESS;
    1143 
    1144     LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
    1145                  pDisk, pDiscard, cbDiscardingNew));
    1146 
    1147     while (pDiscard->cbDiscarding > cbDiscardingNew)
    1148     {
    1149         PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
    1150 
    1151         Assert(!RTListIsEmpty(&pDiscard->ListLru));
    1152 
    1153         /* Go over the allocation bitmap and mark all discarded sectors as unused. */
    1154         uint64_t offStart = pBlock->Core.Key;
    1155         uint32_t idxStart = 0;
    1156         size_t cbLeft = pBlock->cbDiscard;
    1157         bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
    1158         uint32_t cSectors = pBlock->cbDiscard / 512;
    1159 
    1160         while (cbLeft > 0)
    1161         {
    1162             int32_t idxEnd;
    1163             size_t cbThis = cbLeft;
    1164 
    1165             if (fAllocated)
    1166             {
    1167                 /* Check for the first unallocated bit. */
    1168                 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
    1169                 if (idxEnd != -1)
    1170                 {
    1171                     cbThis = (idxEnd - idxStart) * 512;
    1172                     fAllocated = false;
    1173                 }
    1174             }
    1175             else
    1176             {
    1177                 /* Mark as unused and check for the first set bit. */
    1178                 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
    1179                 if (idxEnd != -1)
    1180                     cbThis = (idxEnd - idxStart) * 512;
    1181 
    1182 
    1183                 VDIOCTX IoCtx;
    1184                 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
    1185                             NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1186                 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
    1187                                                             &IoCtx, offStart, cbThis, NULL,
    1188                                                             NULL, &cbThis, NULL,
    1189                                                             VD_DISCARD_MARK_UNUSED);
    1190                 if (RT_FAILURE(rc))
    1191                     break;
    1192 
    1193                 fAllocated = true;
    1194             }
    1195 
    1196             idxStart  = idxEnd;
    1197             offStart += cbThis;
    1198             cbLeft   -= cbThis;
    1199         }
    1200 
    1201         if (RT_FAILURE(rc))
    1202             break;
    1203 
    1204         PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
    1205         Assert(pBlockRemove == pBlock);
    1206         RTListNodeRemove(&pBlock->NodeLru);
    1207 
    1208         pDiscard->cbDiscarding -= pBlock->cbDiscard;
    1209         RTMemFree(pBlock->pbmAllocated);
    1210         RTMemFree(pBlock);
    1211     }
    1212 
    1213     Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
    1214 
    1215     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1216     return rc;
    1217 }
    1218 
    1219 /**
    1220  * Destroys the current discard state, writing any waiting blocks to the image.
    1221  *
    1222  * @returns VBox status code.
    1223  * @param   pDisk    VD disk container.
    1224  */
    1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)
    1226 {
    1227     int rc = VINF_SUCCESS;
    1228 
    1229     if (pDisk->pDiscard)
    1230     {
    1231         rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
    1232         AssertRC(rc);
    1233         RTMemFree(pDisk->pDiscard->pTreeBlocks);
    1234         RTMemFree(pDisk->pDiscard);
    1235         pDisk->pDiscard = NULL;
    1236     }
    1237 
    1238     return rc;
    1239 }
    1240 
    1241 /**
    1242  * Marks the given range as allocated in the image.
    1243  * Required if there are discards in progress and a write to a block which can get discarded
    1244  * is written to.
    1245  *
    1246  * @returns VBox status code.
    1247  * @param   pDisk    VD container data.
    1248  * @param   uOffset  First byte to mark as allocated.
    1249  * @param   cbRange  Number of bytes to mark as allocated.
    1250  */
    1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
    1252 {
    1253     PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
    1254     int rc = VINF_SUCCESS;
    1255 
    1256     if (pDiscard)
    1257     {
    1258         do
    1259         {
    1260             size_t cbThisRange = cbRange;
    1261             PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
    1262 
    1263             if (pBlock)
    1264             {
    1265                 int32_t idxStart, idxEnd;
    1266 
    1267                 Assert(!(cbThisRange % 512));
    1268                 Assert(!((uOffset - pBlock->Core.Key) % 512));
    1269 
    1270                 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
    1271 
    1272                 idxStart = (uOffset - pBlock->Core.Key) / 512;
    1273                 idxEnd = idxStart + (cbThisRange / 512);
    1274                 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
    1275             }
    1276             else
    1277             {
    1278                 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
    1279                 if (pBlock)
    1280                     cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
    1281             }
    1282 
    1283             Assert(cbRange >= cbThisRange);
    1284 
    1285             uOffset += cbThisRange;
    1286             cbRange -= cbThisRange;
    1287         } while (cbRange != 0);
    1288     }
    1289 
    1290     return rc;
    1291 }
    1292 
    1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1294                                   uint64_t uOffset, size_t cbTransfer,
    1295                                   PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
    1296                                   void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1297                                   uint32_t fFlags)
    1298 {
    1299     PVDIOCTX pIoCtx = NULL;
    1300 
    1301     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1302     if (RT_LIKELY(pIoCtx))
    1303     {
    1304         vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1305                     pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1306     }
    1307 
    1308     return pIoCtx;
    1309 }
    1310 
    1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1312                                       uint64_t uOffset, size_t cbTransfer,
    1313                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1314                                       PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1315                                       void *pvUser1, void *pvUser2,
    1316                                       void *pvAllocation,
    1317                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1318                                       uint32_t fFlags)
    1319 {
    1320     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1321                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1322 
    1323     if (RT_LIKELY(pIoCtx))
    1324     {
    1325         pIoCtx->pIoCtxParent          = NULL;
    1326         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1327         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1328         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1329     }
    1330 
    1331     LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
    1332     return pIoCtx;
    1333 }
    1334 
    1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
    1336                                          unsigned cRanges,
    1337                                          PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1338                                          void *pvUser1, void *pvUser2,
    1339                                          void *pvAllocation,
    1340                                          PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1341                                          uint32_t fFlags)
    1342 {
    1343     PVDIOCTX pIoCtx = NULL;
    1344 
    1345     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1346     if (RT_LIKELY(pIoCtx))
    1347     {
    1348         pIoCtx->pIoCtxNext                = NULL;
    1349         pIoCtx->pDisk                     = pDisk;
    1350         pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
    1351         pIoCtx->cDataTransfersPending     = 0;
    1352         pIoCtx->cMetaTransfersPending     = 0;
    1353         pIoCtx->fComplete                 = false;
    1354         pIoCtx->fFlags                    = fFlags;
    1355         pIoCtx->pvAllocation              = pvAllocation;
    1356         pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
    1357         pIoCtx->pfnIoCtxTransferNext      = NULL;
    1358         pIoCtx->rcReq                     = VINF_SUCCESS;
    1359         pIoCtx->Req.Discard.paRanges      = paRanges;
    1360         pIoCtx->Req.Discard.cRanges       = cRanges;
    1361         pIoCtx->Req.Discard.idxRange      = 0;
    1362         pIoCtx->Req.Discard.cbDiscardLeft = 0;
    1363         pIoCtx->Req.Discard.offCur        = 0;
    1364         pIoCtx->Req.Discard.cbThisDiscard = 0;
    1365 
    1366         pIoCtx->pIoCtxParent          = NULL;
    1367         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1368         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1369         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1370     }
    1371 
    1372     LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
    1373     return pIoCtx;
    1374 }
    1375 
    1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1377                                        uint64_t uOffset, size_t cbTransfer,
    1378                                        PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1379                                        PVDIOCTX pIoCtxParent, size_t cbTransferParent,
    1380                                        size_t cbWriteParent, void *pvAllocation,
    1381                                        PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
    1382 {
    1383     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1384                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);
    1385 
    1386     AssertPtr(pIoCtxParent);
    1387     Assert(!pIoCtxParent->pIoCtxParent);
    1388 
    1389     if (RT_LIKELY(pIoCtx))
    1390     {
    1391         pIoCtx->pIoCtxParent                   = pIoCtxParent;
    1392         pIoCtx->Type.Child.uOffsetSaved        = uOffset;
    1393         pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
    1394         pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
    1395         pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
    1396     }
    1397 
    1398     LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
    1399     return pIoCtx;
    1400 }
    1401 
    1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
    1403 {
    1404     PVDIOTASK pIoTask = NULL;
    1405 
    1406     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1407     if (pIoTask)
    1408     {
    1409         pIoTask->pIoStorage           = pIoStorage;
    1410         pIoTask->pfnComplete          = pfnComplete;
    1411         pIoTask->pvUser               = pvUser;
    1412         pIoTask->fMeta                = false;
    1413         pIoTask->Type.User.cbTransfer = cbTransfer;
    1414         pIoTask->Type.User.pIoCtx     = pIoCtx;
    1415     }
    1416 
    1417     return pIoTask;
    1418 }
    1419 
    1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
    1421 {
    1422     PVDIOTASK pIoTask = NULL;
    1423 
    1424     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1425     if (pIoTask)
    1426     {
    1427         pIoTask->pIoStorage          = pIoStorage;
    1428         pIoTask->pfnComplete         = pfnComplete;
    1429         pIoTask->pvUser              = pvUser;
    1430         pIoTask->fMeta               = true;
    1431         pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
    1432     }
    1433 
    1434     return pIoTask;
    1435 }
    1436 
    1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1438 {
    1439     LogFlow(("Freeing I/O context %#p\n", pIoCtx));
    1440     if (pIoCtx->pvAllocation)
    1441         RTMemFree(pIoCtx->pvAllocation);
    1442 #ifdef DEBUG
    1443     memset(pIoCtx, 0xff, sizeof(VDIOCTX));
    1444 #endif
    1445     RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
    1446 }
    1447 
    1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
    1449 {
    1450     RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
    1451 }
    1452 
    1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
    1454 {
    1455     AssertPtr(pIoCtx->pIoCtxParent);
    1456 
    1457     RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    1458     pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
    1459     pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
    1460 }
    1461 
    1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
    1463 {
    1464     PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
    1465 
    1466     if (RT_LIKELY(pMetaXfer))
    1467     {
    1468         pMetaXfer->Core.Key     = uOffset;
    1469         pMetaXfer->Core.KeyLast = uOffset + cb - 1;
    1470         pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
    1471         pMetaXfer->cbMeta       = cb;
    1472         pMetaXfer->pIoStorage   = pIoStorage;
    1473         pMetaXfer->cRefs        = 0;
    1474         RTListInit(&pMetaXfer->ListIoCtxWaiting);
    1475     }
    1476     return pMetaXfer;
    1477 }
    1478 
    1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
    1480 {
    1481     /* Put it on the waiting list. */
    1482     PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
    1483     PVDIOCTX pHeadOld;
    1484     pIoCtx->pIoCtxNext = pNext;
    1485     while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
    1486     {
    1487         pNext = pHeadOld;
    1488         Assert(pNext != pIoCtx);
    1489         pIoCtx->pIoCtxNext = pNext;
    1490         ASMNopPause();
    1491     }
    1492 }
    1493 
    1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1495 {
    1496     LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
    1497 
    1498     Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
    1499     pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1500     vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
    1501 }
    1502 
    1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
    1504 {
    1505     return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
    1506 }
    1507 
    1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
    1509 {
    1510     return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
    1511 }
    1512 
    1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
    1514 {
    1515     return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1516 }
    1517 
    1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
    1519 {
    1520     return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1521 }
    1522 
    1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
    1524 {
    1525     return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
    1526 }
    1527 
    1528 /**
    1529  * Process the I/O context, core method which assumes that the I/O context
    1530  * acquired the lock.
    1531  *
    1532  * @returns VBox status code.
    1533  * @param   pIoCtx    I/O context to process.
    1534  */
    1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
    1536 {
    1537     int rc = VINF_SUCCESS;
    1538 
    1539     VD_IS_LOCKED(pIoCtx->pDisk);
    1540 
    1541     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1542 
    1543     if (   !pIoCtx->cMetaTransfersPending
    1544         && !pIoCtx->cDataTransfersPending
    1545         && !pIoCtx->pfnIoCtxTransfer)
    1546     {
    1547         rc = VINF_VD_ASYNC_IO_FINISHED;
    1548         goto out;
    1549     }
    1550 
    1551     /*
    1552      * We complete the I/O context in case of an error
    1553      * if there is no I/O task pending.
    1554      */
    1555     if (   RT_FAILURE(pIoCtx->rcReq)
    1556         && !pIoCtx->cMetaTransfersPending
    1557         && !pIoCtx->cDataTransfersPending)
    1558     {
    1559         rc = VINF_VD_ASYNC_IO_FINISHED;
    1560         goto out;
    1561     }
    1562 
    1563     /* Don't change anything if there is a metadata transfer pending or we are blocked. */
    1564     if (   pIoCtx->cMetaTransfersPending
    1565         || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
    1566     {
    1567         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1568         goto out;
    1569     }
    1570 
    1571     if (pIoCtx->pfnIoCtxTransfer)
    1572     {
    1573         /* Call the transfer function advancing to the next while there is no error. */
    1574         while (   pIoCtx->pfnIoCtxTransfer
    1575                && !pIoCtx->cMetaTransfersPending
    1576                && RT_SUCCESS(rc))
    1577         {
    1578             LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
    1579             rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
    1580 
    1581             /* Advance to the next part of the transfer if the current one succeeded. */
    1582             if (RT_SUCCESS(rc))
    1583             {
    1584                 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
    1585                 pIoCtx->pfnIoCtxTransferNext = NULL;
    1586             }
    1587         }
    1588     }
    1589 
    1590     if (   RT_SUCCESS(rc)
    1591         && !pIoCtx->cMetaTransfersPending
    1592         && !pIoCtx->cDataTransfersPending)
    1593         rc = VINF_VD_ASYNC_IO_FINISHED;
    1594     else if (   RT_SUCCESS(rc)
    1595              || rc == VERR_VD_NOT_ENOUGH_METADATA
    1596              || rc == VERR_VD_IOCTX_HALT)
    1597         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1598     else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
    1599     {
    1600         ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
    1601         /*
    1602          * The I/O context completed if we have an error and there is no data
    1603          * or meta data transfer pending.
    1604          */
    1605         if (   !pIoCtx->cMetaTransfersPending
    1606             && !pIoCtx->cDataTransfersPending)
    1607             rc = VINF_VD_ASYNC_IO_FINISHED;
    1608         else
    1609             rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1610     }
    1611 
    1612 out:
    1613     LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    1614                  pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
    1615                  pIoCtx->fComplete));
    1616 
    1617     return rc;
    1618 }
    1619 
    1620 /**
    1621  * Processes the list of waiting I/O contexts.
    1622  *
    1623  * @returns VBox status code.
    1624  * @param   pDisk    The disk structure.
    1625  * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
    1626  *                   The status code is returned. NULL if there is no I/O context
    1627  *                   to return the status code for.
    1628  */
    1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
    1630 {
    1631     int rc = VINF_SUCCESS;
    1632 
    1633     LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
    1634 
    1635     VD_IS_LOCKED(pDisk);
    1636 
    1637     /* Get the waiting list and process it in FIFO order. */
    1638     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
    1639 
    1640     /* Reverse it. */
    1641     PVDIOCTX pCur = pIoCtxHead;
    1642     pIoCtxHead = NULL;
    1643     while (pCur)
    1644     {
    1645         PVDIOCTX pInsert = pCur;
    1646         pCur = pCur->pIoCtxNext;
    1647         pInsert->pIoCtxNext = pIoCtxHead;
    1648         pIoCtxHead = pInsert;
    1649     }
    1650 
    1651     /* Process now. */
    1652     pCur = pIoCtxHead;
    1653     while (pCur)
    1654     {
    1655         int rcTmp;
    1656         PVDIOCTX pTmp = pCur;
    1657 
    1658         pCur = pCur->pIoCtxNext;
    1659         pTmp->pIoCtxNext = NULL;
    1660 
    1661         rcTmp = vdIoCtxProcessLocked(pTmp);
    1662         if (pTmp == pIoCtxRc)
    1663         {
    1664             /* The given I/O context was processed, pass the return code to the caller. */
    1665             rc = rcTmp;
    1666         }
    1667         else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
    1668                  && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1669         {
    1670             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1671             vdThreadFinishWrite(pDisk);
    1672             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1673                                         pTmp->Type.Root.pvUser2,
    1674                                         pTmp->rcReq);
    1675             vdIoCtxFree(pDisk, pTmp);
    1676         }
    1677     }
    1678 
    1679     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1680     return rc;
    1681 }
    1682 
    1683 /**
    1684  * Processes the list of blocked I/O contexts.
    1685  *
    1686  * @returns nothing.
    1687  * @param   pDisk    The disk structure.
    1688  */
    1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
    1690 {
    1691     LogFlowFunc(("pDisk=%#p\n", pDisk));
    1692 
    1693     VD_IS_LOCKED(pDisk);
    1694 
    1695     /* Get the waiting list and process it in FIFO order. */
    1696     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
    1697 
    1698     /* Reverse it. */
    1699     PVDIOCTX pCur = pIoCtxHead;
    1700     pIoCtxHead = NULL;
    1701     while (pCur)
    1702     {
    1703         PVDIOCTX pInsert = pCur;
    1704         pCur = pCur->pIoCtxNext;
    1705         pInsert->pIoCtxNext = pIoCtxHead;
    1706         pIoCtxHead = pInsert;
    1707     }
    1708 
    1709     /* Process now. */
    1710     pCur = pIoCtxHead;
    1711     while (pCur)
    1712     {
    1713         int rc;
    1714         PVDIOCTX pTmp = pCur;
    1715 
    1716         pCur = pCur->pIoCtxNext;
    1717         pTmp->pIoCtxNext = NULL;
    1718 
    1719         Assert(!pTmp->pIoCtxParent);
    1720         Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
    1721         pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
    1722 
    1723         rc = vdIoCtxProcessLocked(pTmp);
    1724         if (   rc == VINF_VD_ASYNC_IO_FINISHED
    1725             && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1726         {
    1727             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1728             vdThreadFinishWrite(pDisk);
    1729             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1730                                         pTmp->Type.Root.pvUser2,
    1731                                         pTmp->rcReq);
    1732             vdIoCtxFree(pDisk, pTmp);
    1733         }
    1734     }
    1735 
    1736     LogFlowFunc(("returns\n"));
    1737 }
    1738 
    1739 /**
    1740  * Processes the I/O context trying to lock the criticial section.
    1741  * The context is deferred if the critical section is busy.
    1742  *
    1743  * @returns VBox status code.
    1744  * @param   pIoCtx    The I/O context to process.
    1745  */
    1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
    1747 {
    1748     int rc = VINF_SUCCESS;
    1749     PVBOXHDD pDisk = pIoCtx->pDisk;
    1750 
    1751     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1752 
    1753     /* Put it on the waiting list first. */
    1754     vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
    1755 
    1756     if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
    1757     {
    1758         /* Leave it again, the context will be processed just before leaving the lock. */
    1759         LogFlowFunc(("Successfully acquired the lock\n"));
    1760         rc = vdDiskUnlock(pDisk, pIoCtx);
    1761     }
    1762     else
    1763     {
    1764         LogFlowFunc(("Lock is held\n"));
    1765         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1766     }
    1767 
    1768     return rc;
    1769 }
    1770 
    1771 /**
    1772  * Process the I/O context in a synchronous manner, waiting
    1773  * for it to complete.
    1774  *
    1775  * @returns VBox status code of the completed request.
    1776  * @param   pIoCtx    The sync I/O context.
    1777  */
    1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
    1779 {
    1780     int rc = VINF_SUCCESS;
    1781     PVBOXHDD pDisk = pIoCtx->pDisk;
    1782 
    1783     LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
    1784 
    1785     AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
    1786               ("I/O context is not marked as synchronous\n"));
    1787 
    1788     rc = vdIoCtxProcessTryLockDefer(pIoCtx);
    1789     if (rc == VINF_VD_ASYNC_IO_FINISHED)
    1790         rc = VINF_SUCCESS;
    1791 
    1792     if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1793     {
    1794         rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
    1795         AssertRC(rc);
    1796 
    1797         rc = pDisk->rcSync;
    1798     }
    1799     else /* Success or error. */
    1800         vdIoCtxFree(pDisk, pIoCtx);
    1801 
    1802     return rc;
    1803 }
    1804 
    1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1806 {
    1807     return pDisk->pIoCtxLockOwner == pIoCtx;
    1808 }
    1809 
    1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1811 {
    1812     int rc = VINF_SUCCESS;
    1813 
    1814     VD_IS_LOCKED(pDisk);
    1815 
    1816     LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
    1817 
    1818     if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
    1819     {
    1820         Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
    1821         vdIoCtxDefer(pDisk, pIoCtx);
    1822         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1823     }
    1824 
    1825     LogFlowFunc(("returns -> %Rrc\n", rc));
    1826     return rc;
    1827 }
    1828 
    1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
    1830 {
    1831     LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
    1832                  pDisk, pIoCtx, fProcessBlockedReqs));
    1833 
    1834     VD_IS_LOCKED(pDisk);
    1835 
    1836     LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
    1837     Assert(pDisk->pIoCtxLockOwner == pIoCtx);
    1838     ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
    1839 
    1840     if (fProcessBlockedReqs)
    1841     {
    1842         /* Process any blocked writes if the current request didn't caused another growing. */
    1843         vdDiskProcessBlockedIoCtx(pDisk);
    1844     }
    1845 
    1846     LogFlowFunc(("returns\n"));
    1847 }
    1848 
    1849 /**
    1850  * internal: read the specified amount of data in whatever blocks the backend
    1851  * will give us - async version.
    1852  */
    1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)
    1854 {
    1855     int rc;
    1856     size_t cbToRead     = pIoCtx->Req.Io.cbTransfer;
    1857     uint64_t uOffset    = pIoCtx->Req.Io.uOffset;
    1858     PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;
    1859     size_t cbThisRead;
    1860 
    1861     /* Loop until all reads started or we have a backend which needs to read metadata. */
    1862     do
    1863     {
    1864         /* Search for image with allocated block. Do not attempt to read more
    1865          * than the previous reads marked as valid. Otherwise this would return
    1866          * stale data when different block sizes are used for the images. */
    1867         cbThisRead = cbToRead;
    1868 
    1869         /*
    1870          * Try to read from the given image.
    1871          * If the block is not allocated read from override chain if present.
    1872          */
    1873         rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1874                                           uOffset, cbThisRead,
    1875                                           pIoCtx, &cbThisRead);
    1876 
    1877         if (rc == VERR_VD_BLOCK_FREE)
    1878         {
    1879             while (   pCurrImage->pPrev != NULL
    1880                    && rc == VERR_VD_BLOCK_FREE)
    1881             {
    1882                 pCurrImage =  pCurrImage->pPrev;
    1883                 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1884                                                   uOffset, cbThisRead,
    1885                                                   pIoCtx, &cbThisRead);
    1886             }
    1887         }
    1888 
    1889         /* The task state will be updated on success already, don't do it here!. */
    1890         if (rc == VERR_VD_BLOCK_FREE)
    1891         {
    1892             /* No image in the chain contains the data for the block. */
    1893             vdIoCtxSet(pIoCtx, '\0', cbThisRead);
    1894             ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
    1895             rc = VINF_SUCCESS;
    1896         }
    1897         else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1898             rc = VINF_SUCCESS;
    1899         else if (rc == VERR_VD_IOCTX_HALT)
    1900         {
    1901             uOffset  += cbThisRead;
    1902             cbToRead -= cbThisRead;
    1903             pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1904         }
    1905 
    1906         if (RT_FAILURE(rc))
    1907             break;
    1908 
    1909         cbToRead -= cbThisRead;
    1910         uOffset  += cbThisRead;
    1911         pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
    1912     } while (cbToRead != 0 && RT_SUCCESS(rc));
    1913 
    1914     if (   rc == VERR_VD_NOT_ENOUGH_METADATA
    1915         || rc == VERR_VD_IOCTX_HALT)
    1916     {
    1917         /* Save the current state. */
    1918         pIoCtx->Req.Io.uOffset    = uOffset;
    1919         pIoCtx->Req.Io.cbTransfer = cbToRead;
    1920         pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
    1921     }
    1922 
    1923     return rc;
    1924 }
    1925 
    1926 /**
    1927  * internal: parent image read wrapper for compacting.
    1928  */
    1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
    1930                         size_t cbRead)
    1931 {
    1932     PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
    1933     return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,
    1934                         pvBuf, cbRead, false /* fUpdateCache */);
    19351965}
    19361966
     
    19842014
    19852015/**
    1986  * internal: write a complete block (only used for diff images), taking the
    1987  * remaining data from parent images. This implementation does not optimize
    1988  * anything (except that it tries to read only that portions from parent
    1989  * images that are really needed).
    1990  */
    1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,
    1992                                  PVDIMAGE pImageParentOverride,
    1993                                  uint64_t uOffset, size_t cbWrite,
    1994                                  size_t cbThisWrite, size_t cbPreRead,
    1995                                  size_t cbPostRead, const void *pvBuf,
    1996                                  void *pvTmp)
    1997 {
    1998     int rc = VINF_SUCCESS;
    1999 
    2000     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2001                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2002 
    2003     /* Read the data that goes before the write to fill the block. */
    2004     if (cbPreRead)
    2005     {
    2006         /*
    2007          * Updating the cache doesn't make sense here because
    2008          * this will be done after the complete block was written.
    2009          */
    2010         rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2011                             uOffset - cbPreRead, pvTmp, cbPreRead,
    2012                             true /* fZeroFreeBlocks*/,
    2013                             false /* fUpdateCache */, 0);
    2014         if (RT_FAILURE(rc))
    2015             return rc;
    2016     }
    2017 
    2018     /* Copy the data to the right place in the buffer. */
    2019     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2020 
    2021     /* Read the data that goes after the write to fill the block. */
    2022     if (cbPostRead)
    2023     {
    2024         /* If we have data to be written, use that instead of reading
    2025          * data from the image. */
    2026         size_t cbWriteCopy;
    2027         if (cbWrite > cbThisWrite)
    2028             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2029         else
    2030             cbWriteCopy = 0;
    2031         /* Figure out how much we cannot read from the image, because
    2032          * the last block to write might exceed the nominal size of the
    2033          * image for technical reasons. */
    2034         size_t cbFill;
    2035         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2036             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2037         else
    2038             cbFill = 0;
    2039         /* The rest must be read from the image. */
    2040         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2041 
    2042         /* Now assemble the remaining data. */
    2043         if (cbWriteCopy)
    2044             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2045                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2046         if (cbReadImage)
    2047             rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2048                                 uOffset + cbThisWrite + cbWriteCopy,
    2049                                 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,
    2050                                 cbReadImage, true /* fZeroFreeBlocks */,
    2051                                 false /* fUpdateCache */, 0);
    2052         if (RT_FAILURE(rc))
    2053             return rc;
    2054         /* Zero out the remainder of this block. Will never be visible, as this
    2055          * is beyond the limit of the image. */
    2056         if (cbFill)
    2057             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2058                    '\0', cbFill);
    2059     }
    2060 
    2061     /* Write the full block to the virtual disk. */
    2062     RTSGSEG SegmentBuf;
    2063     RTSGBUF SgBuf;
    2064     VDIOCTX IoCtx;
    2065 
    2066     SegmentBuf.pvSeg = pvTmp;
    2067     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2068     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2069     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2070                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2071     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2072                                         cbPreRead + cbThisWrite + cbPostRead,
    2073                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2074     Assert(rc != VERR_VD_BLOCK_FREE);
    2075     Assert(cbPreRead == 0);
    2076     Assert(cbPostRead == 0);
    2077 
    2078     return rc;
    2079 }
    2080 
    2081 /**
    2082  * internal: write a complete block (only used for diff images), taking the
    2083  * remaining data from parent images. This implementation optimizes out writes
    2084  * that do not change the data relative to the state as of the parent images.
    2085  * All backends which support differential/growing images support this.
    2086  */
    2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,
    2088                                   PVDIMAGE pImageParentOverride,
    2089                                   uint64_t uOffset, size_t cbWrite,
    2090                                   size_t cbThisWrite, size_t cbPreRead,
    2091                                   size_t cbPostRead, const void *pvBuf,
    2092                                   void *pvTmp, unsigned cImagesRead)
    2093 {
    2094     size_t cbFill = 0;
    2095     size_t cbWriteCopy = 0;
    2096     size_t cbReadImage = 0;
    2097     int rc;
    2098 
    2099     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2100                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2101 
    2102     if (cbPostRead)
    2103     {
    2104         /* Figure out how much we cannot read from the image, because
    2105          * the last block to write might exceed the nominal size of the
    2106          * image for technical reasons. */
    2107         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2108             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2109 
    2110         /* If we have data to be written, use that instead of reading
    2111          * data from the image. */
    2112         if (cbWrite > cbThisWrite)
    2113             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2114 
    2115         /* The rest must be read from the image. */
    2116         cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2117     }
    2118 
    2119     /* Read the entire data of the block so that we can compare whether it will
    2120      * be modified by the write or not. */
    2121     rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,
    2122                         cbPreRead + cbThisWrite + cbPostRead - cbFill,
    2123                         true /* fZeroFreeBlocks */, false /* fUpdateCache */,
    2124                         cImagesRead);
    2125     if (RT_FAILURE(rc))
    2126         return rc;
    2127 
    2128     /* Check if the write would modify anything in this block. */
    2129     if (   !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)
    2130         && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,
    2131                                     (char *)pvBuf + cbThisWrite, cbWriteCopy)))
    2132     {
    2133         /* Block is completely unchanged, so no need to write anything. */
    2134         return VINF_SUCCESS;
    2135     }
    2136 
    2137     /* Copy the data to the right place in the buffer. */
    2138     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2139 
    2140     /* Handle the data that goes after the write to fill the block. */
    2141     if (cbPostRead)
    2142     {
    2143         /* Now assemble the remaining data. */
    2144         if (cbWriteCopy)
    2145             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2146                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2147         /* Zero out the remainder of this block. Will never be visible, as this
    2148          * is beyond the limit of the image. */
    2149         if (cbFill)
    2150             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2151                    '\0', cbFill);
    2152     }
    2153 
    2154     /* Write the full block to the virtual disk. */
    2155     RTSGSEG SegmentBuf;
    2156     RTSGBUF SgBuf;
    2157     VDIOCTX IoCtx;
    2158 
    2159     SegmentBuf.pvSeg = pvTmp;
    2160     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2161     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2162     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2163                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2164     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2165                                         cbPreRead + cbThisWrite + cbPostRead,
    2166                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2167     Assert(rc != VERR_VD_BLOCK_FREE);
    2168     Assert(cbPreRead == 0);
    2169     Assert(cbPostRead == 0);
    2170 
    2171     return rc;
    2172 }
    2173 
    2174 /**
    21752016 * internal: write buffer to the image, taking care of block boundaries and
    21762017 * write optimizations.
     
    21812022                           bool fUpdateCache, unsigned cImagesRead)
    21822023{
    2183     int rc;
    2184     unsigned fWrite;
    2185     size_t cbThisWrite;
    2186     size_t cbPreRead, cbPostRead;
    2187     uint64_t uOffsetCur = uOffset;
    2188     size_t cbWriteCur = cbWrite;
    2189     const void *pcvBufCur = pvBuf;
    2190     RTSGSEG SegmentBuf;
     2024    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     2025    RTSGSEG Segment;
    21912026    RTSGBUF SgBuf;
    21922027    VDIOCTX IoCtx;
    21932028
    2194     /* Loop until all written. */
    2195     do
    2196     {
    2197         /* Try to write the possibly partial block to the last opened image.
    2198          * This works when the block is already allocated in this image or
    2199          * if it is a full-block write (and allocation isn't suppressed below).
    2200          * For image formats which don't support zero blocks, it's beneficial
    2201          * to avoid unnecessarily allocating unchanged blocks. This prevents
    2202          * unwanted expanding of images. VMDK is an example. */
    2203         cbThisWrite = cbWriteCur;
    2204         fWrite =   (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
    2205                  ? 0 : VD_WRITE_NO_ALLOC;
    2206 
    2207         SegmentBuf.pvSeg = (void *)pcvBufCur;
    2208         SegmentBuf.cbSeg = cbWrite;
    2209         RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2210         vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2211                     &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2212         rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite,
    2213                                             &IoCtx, &cbThisWrite, &cbPreRead,
    2214                                             &cbPostRead, fWrite);
    2215         if (rc == VERR_VD_BLOCK_FREE)
    2216         {
    2217             void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead);
    2218             AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY);
    2219 
    2220             if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME))
    2221             {
    2222                 /* Optimized write, suppress writing to a so far unallocated
    2223                  * block if the data is in fact not changed. */
    2224                 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride,
    2225                                             uOffsetCur, cbWriteCur,
    2226                                             cbThisWrite, cbPreRead, cbPostRead,
    2227                                             pcvBufCur, pvTmp, cImagesRead);
    2228             }
    2229             else
    2230             {
    2231                 /* Normal write, not optimized in any way. The block will
    2232                  * be written no matter what. This will usually (unless the
    2233                  * backend has some further optimization enabled) cause the
    2234                  * block to be allocated. */
    2235                 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride,
    2236                                            uOffsetCur, cbWriteCur,
    2237                                            cbThisWrite, cbPreRead, cbPostRead,
    2238                                            pcvBufCur, pvTmp);
    2239             }
    2240             RTMemTmpFree(pvTmp);
    2241             if (RT_FAILURE(rc))
    2242                 break;
    2243         }
    2244 
    2245         cbWriteCur -= cbThisWrite;
    2246         uOffsetCur += cbThisWrite;
    2247         pcvBufCur = (char *)pcvBufCur + cbThisWrite;
    2248     } while (cbWriteCur != 0 && RT_SUCCESS(rc));
    2249 
    2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */
    2251     /* Update the cache on success */
    2252     if (   RT_SUCCESS(rc)
    2253         && pDisk->pCache
    2254         && fUpdateCache)
    2255         rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL);
    2256 
    2257     if (RT_SUCCESS(rc))
    2258         rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
    2259 #endif
    2260 
    2261     return rc;
     2029    if (fUpdateCache)
     2030        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     2031
     2032    Segment.pvSeg = (void *)pvBuf;
     2033    Segment.cbSeg = cbWrite;
     2034    RTSgBufInit(&SgBuf, &Segment, 1);
     2035    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
     2036                NULL, vdWriteHelperAsync, fFlags);
     2037
     2038    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     2039    IoCtx.Req.Io.cImagesRead = cImagesRead;
     2040    IoCtx.pIoCtxParent          = NULL;
     2041    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     2042    IoCtx.Type.Root.pvUser1     = pDisk;
     2043    IoCtx.Type.Root.pvUser2     = NULL;
     2044    return vdIoCtxProcessSync(&IoCtx);
    22622045}
    22632046
     
    24912274}
    24922275
    2493 /**
    2494  * internal: write a complete block (only used for diff images), taking the
    2495  * remaining data from parent images. This implementation does not optimize
    2496  * anything (except that it tries to read only that portions from parent
    2497  * images that are really needed) - async version.
    2498  */
    2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
    2500 {
    2501     int rc = VINF_SUCCESS;
    2502 
    2503 #if 0
    2504 
    2505     /* Read the data that goes before the write to fill the block. */
    2506     if (cbPreRead)
    2507     {
    2508         rc = vdReadHelperAsync(pIoCtxDst);
    2509         if (RT_FAILURE(rc))
    2510             return rc;
    2511     }
    2512 
    2513     /* Copy the data to the right place in the buffer. */
    2514     vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite);
    2515 
    2516     /* Read the data that goes after the write to fill the block. */
    2517     if (cbPostRead)
    2518     {
    2519         /* If we have data to be written, use that instead of reading
    2520          * data from the image. */
    2521         size_t cbWriteCopy;
    2522         if (cbWrite > cbThisWrite)
    2523             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2524         else
    2525             cbWriteCopy = 0;
    2526         /* Figure out how much we cannot read from the image, because
    2527          * the last block to write might exceed the nominal size of the
    2528          * image for technical reasons. */
    2529         size_t cbFill;
    2530         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2531             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2532         else
    2533             cbFill = 0;
    2534         /* The rest must be read from the image. */
    2535         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2536 
    2537         /* Now assemble the remaining data. */
    2538         if (cbWriteCopy)
    2539         {
    2540             vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy);
    2541             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy);
    2542         }
    2543 
    2544         if (cbReadImage)
    2545             rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst,
    2546                                    uOffset + cbThisWrite + cbWriteCopy,
    2547                                    cbReadImage);
    2548         if (RT_FAILURE(rc))
    2549             return rc;
    2550         /* Zero out the remainder of this block. Will never be visible, as this
    2551          * is beyond the limit of the image. */
    2552         if (cbFill)
    2553         {
    2554             vdIoCtxSet(pIoCtxDst, '\0', cbFill);
    2555             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill);
    2556         }
    2557     }
    2558 
    2559     if (   !pIoCtxDst->cbTransferLeft
    2560         && !pIoCtxDst->cMetaTransfersPending
    2561         && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false))
    2562     {
    2563         /* Write the full block to the virtual disk. */
    2564         vdIoCtxChildReset(pIoCtxDst);
    2565         rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2566                                             uOffset - cbPreRead,
    2567                                             cbPreRead + cbThisWrite + cbPostRead,
    2568                                             pIoCtxDst,
    2569                                             NULL, &cbPreRead, &cbPostRead, 0);
    2570         Assert(rc != VERR_VD_BLOCK_FREE);
    2571         Assert(cbPreRead == 0);
    2572         Assert(cbPostRead == 0);
    2573     }
    2574     else
    2575     {
    2576         LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    2577                  pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending,
    2578                  pIoCtxDst->fComplete));
    2579         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    2580     }
    2581 
    2582     return rc;
    2583 #endif
    2584     return VERR_NOT_IMPLEMENTED;
    2585 }
    2586 
    2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx)
     2276static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
    25882277{
    25892278    int rc             = VINF_SUCCESS;
     
    25952284    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    25962285    rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2597                                         pIoCtx->Req.Io.uOffset - cbPreRead,
    2598                                         cbPreRead + cbThisWrite + cbPostRead,
    2599                                         pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
     2286                                   pIoCtx->Req.Io.uOffset - cbPreRead,
     2287                                   cbPreRead + cbThisWrite + cbPostRead,
     2288                                   pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    26002289    Assert(rc != VERR_VD_BLOCK_FREE);
    26012290    Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
     
    26862375    /* Write the full block to the virtual disk. */
    26872376    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    2688     pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync;
     2377    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
    26892378
    26902379    return rc;
     
    26962385
    26972386    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2387
     2388    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
    26982389
    26992390    if (pIoCtx->Req.Io.cbTransferLeft)
     
    27622453    /* Next step */
    27632454    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
     2455    return VINF_SUCCESS;
     2456}
     2457
     2458static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
     2459{
     2460    int rc = VINF_SUCCESS;
     2461    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2462    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2463    PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
     2464
     2465    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2466
     2467    vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
     2468    if (cbPostRead)
     2469    {
     2470        size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
     2471        size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
     2472        size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
     2473
     2474        /* Now assemble the remaining data. */
     2475        if (cbWriteCopy)
     2476        {
     2477            /*
     2478             * The S/G buffer of the parent needs to be cloned because
     2479             * it is not allowed to modify the state.
     2480             */
     2481            RTSGBUF SgBufParentTmp;
     2482
     2483            RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
     2484            RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
     2485        }
     2486
     2487        /* Zero out the remainder of this block. Will never be visible, as this
     2488         * is beyond the limit of the image. */
     2489        if (cbFill)
     2490        {
     2491            RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
     2492            vdIoCtxSet(pIoCtx, '\0', cbFill);
     2493        }
     2494
     2495        if (cbReadImage)
     2496        {
     2497            /* Read remaining data. */
     2498        }
     2499        else
     2500        {
     2501            /* Write the full block to the virtual disk. */
     2502            RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2503            pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2504        }
     2505    }
     2506    else
     2507    {
     2508        /* Write the full block to the virtual disk. */
     2509        RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2510        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2511    }
     2512
     2513    return rc;
     2514}
     2515
     2516static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
     2517{
     2518    int rc = VINF_SUCCESS;
     2519
     2520    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2521
     2522    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     2523
     2524    if (pIoCtx->Req.Io.cbTransferLeft)
     2525        rc = vdReadHelperAsync(pIoCtx);
     2526
     2527    if (   RT_SUCCESS(rc)
     2528        && (   pIoCtx->Req.Io.cbTransferLeft
     2529            || pIoCtx->cMetaTransfersPending))
     2530        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     2531     else
     2532        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2533
     2534    return rc;
     2535}
     2536
     2537static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
     2538{
     2539    PVBOXHDD pDisk = pIoCtx->pDisk;
     2540    uint64_t uOffset   = pIoCtx->Type.Child.uOffsetSaved;
     2541    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2542    size_t cbPreRead   = pIoCtx->Type.Child.cbPreRead;
     2543    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2544    size_t cbWrite     = pIoCtx->Type.Child.cbWriteParent;
     2545    size_t cbFill = 0;
     2546    size_t cbWriteCopy = 0;
     2547    size_t cbReadImage = 0;
     2548
     2549    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2550
     2551    AssertPtr(pIoCtx->pIoCtxParent);
     2552    Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
     2553
     2554    /* Calculate the amount of data to read that goes after the write to fill the block. */
     2555    if (cbPostRead)
     2556    {
     2557        /* If we have data to be written, use that instead of reading
     2558         * data from the image. */
     2559        cbWriteCopy;
     2560        if (cbWrite > cbThisWrite)
     2561            cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
     2562
     2563        /* Figure out how much we cannot read from the image, because
     2564         * the last block to write might exceed the nominal size of the
     2565         * image for technical reasons. */
     2566        if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
     2567            cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
     2568
     2569        /* The rest must be read from the image. */
     2570        cbReadImage = cbPostRead - cbWriteCopy - cbFill;
     2571    }
     2572
     2573    pIoCtx->Type.Child.Write.Optimized.cbFill      = cbFill;
     2574    pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
     2575    pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
     2576
     2577    /* Next step */
     2578    if (cbPreRead)
     2579    {
     2580        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
     2581
     2582        /* Read the data that goes before the write to fill the block. */
     2583        pIoCtx->Req.Io.cbTransferLeft = cbPreRead;
     2584        pIoCtx->Req.Io.cbTransfer     = pIoCtx->Req.Io.cbTransferLeft;
     2585        pIoCtx->Req.Io.uOffset       -= cbPreRead;
     2586    }
     2587    else
     2588        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2589
    27642590    return VINF_SUCCESS;
    27652591}
     
    96379463                                  pfnComplete, pvUser1, pvUser2,
    96389464                                  NULL, vdReadHelperAsync,
    9639                                   VDIOCTX_FLAGS_DEFAULT);
     9465                                  VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
    96409466        if (!pIoCtx)
    96419467        {
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette