VirtualBox

Changeset 44616 in vbox for trunk/src


Ignore:
Timestamp:
Feb 10, 2013 6:38:22 PM (12 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
83702
Message:

Storage/VD: Remove the leftovers of the sync I/O path, everything is covered by the async I/O path now

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Storage/VD.cpp

    r44431 r44616  
    355355            /** S/G buffer */
    356356            RTSGBUF              SgBuf;
     357            /** Flag whether all reads from the image chain returned VERR_VD_BLOCK_FREE
     358             * so far. */
     359            bool                 fAllFree;
     360            /** Number of bytes to clear in the buffer before the current read. */
     361            size_t               cbBufClear;
     362            /** Number of images to read. */
     363            unsigned             cImagesRead;
     364            /** Override for the parent image to start reading from. */
     365            PVDIMAGE             pImageParentOverride;
    357366        } Io;
    358367        /** Discard requests. */
     
    424433
    425434/** Default flags for an I/O context, i.e. unblocked and async. */
    426 #define VDIOCTX_FLAGS_DEFAULT (0)
     435#define VDIOCTX_FLAGS_DEFAULT                   (0)
    427436/** Flag whether the context is blocked. */
    428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
     437#define VDIOCTX_FLAGS_BLOCKED          RT_BIT_32(0)
    429438/** Flag whether the I/O context is using synchronous I/O. */
    430 #define VDIOCTX_FLAGS_SYNC    RT_BIT_32(1)
     439#define VDIOCTX_FLAGS_SYNC             RT_BIT_32(1)
     440/** Flag whether the read should update the cache. */
     441#define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2)
     442/** Flag whether free blocks should be zeroed.
     443 * If false and no image has data for sepcified
     444 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
     445 * Note that unallocated blocks are still zeroed
     446 * if at least one image has valid data for a part
     447 * of the range.
     448 */
     449#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
     450/** Don't free the I/O context when complete because
     451 * it was alloacted elsewhere (stack, ...). */
     452#define VDIOCTX_FLAGS_DONT_FREE        RT_BIT_32(4)
    431453
    432454/** NIL I/O context pointer value. */
     
    577599/** Forward declaration of the async discard helper. */
    578600static int vdDiscardHelperAsync(PVDIOCTX pIoCtx);
     601static int vdWriteHelperAsync(PVDIOCTX pIoCtx);
    579602static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk);
    580603static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc);
     604static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
    581605
    582606/**
     
    809833    pIoCtx->Req.Io.pImageStart    = pImageStart;
    810834    pIoCtx->Req.Io.pImageCur      = pImageStart;
     835    pIoCtx->Req.Io.fAllFree       = true;
     836    pIoCtx->Req.Io.cbBufClear     = 0;
     837    pIoCtx->Req.Io.pImageParentOverride = NULL;
    811838    pIoCtx->cDataTransfersPending = 0;
    812839    pIoCtx->cMetaTransfersPending = 0;
     
    817844    pIoCtx->pfnIoCtxTransferNext  = NULL;
    818845    pIoCtx->rcReq                 = VINF_SUCCESS;
     846    pIoCtx->pIoCtxParent          = NULL;
    819847
    820848    /* There is no S/G list for a flush request. */
     
    845873 */
    846874static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
    847                              PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)
     875                             size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
    848876{
    849877    int rc = VINF_SUCCESS;
     
    908936
    909937/**
     938 * Creates a new empty discard state.
     939 *
     940 * @returns Pointer to the new discard state or NULL if out of memory.
     941 */
     942static PVDDISCARDSTATE vdDiscardStateCreate(void)
     943{
     944    PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
     945
     946    if (pDiscard)
     947    {
     948        RTListInit(&pDiscard->ListLru);
     949        pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
     950        if (!pDiscard->pTreeBlocks)
     951        {
     952            RTMemFree(pDiscard);
     953            pDiscard = NULL;
     954        }
     955    }
     956
     957    return pDiscard;
     958}
     959
     960/**
     961 * Removes the least recently used blocks from the waiting list until
     962 * the new value is reached.
     963 *
     964 * @returns VBox status code.
     965 * @param   pDisk              VD disk container.
     966 * @param   pDiscard           The discard state.
     967 * @param   cbDiscardingNew    How many bytes should be waiting on success.
     968 *                             The number of bytes waiting can be less.
     969 */
     970static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
     971{
     972    int rc = VINF_SUCCESS;
     973
     974    LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
     975                 pDisk, pDiscard, cbDiscardingNew));
     976
     977    while (pDiscard->cbDiscarding > cbDiscardingNew)
     978    {
     979        PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
     980
     981        Assert(!RTListIsEmpty(&pDiscard->ListLru));
     982
     983        /* Go over the allocation bitmap and mark all discarded sectors as unused. */
     984        uint64_t offStart = pBlock->Core.Key;
     985        uint32_t idxStart = 0;
     986        size_t cbLeft = pBlock->cbDiscard;
     987        bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
     988        uint32_t cSectors = pBlock->cbDiscard / 512;
     989
     990        while (cbLeft > 0)
     991        {
     992            int32_t idxEnd;
     993            size_t cbThis = cbLeft;
     994
     995            if (fAllocated)
     996            {
     997                /* Check for the first unallocated bit. */
     998                idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
     999                if (idxEnd != -1)
     1000                {
     1001                    cbThis = (idxEnd - idxStart) * 512;
     1002                    fAllocated = false;
     1003                }
     1004            }
     1005            else
     1006            {
     1007                /* Mark as unused and check for the first set bit. */
     1008                idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
     1009                if (idxEnd != -1)
     1010                    cbThis = (idxEnd - idxStart) * 512;
     1011
     1012
     1013                VDIOCTX IoCtx;
     1014                vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
     1015                            NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1016                rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
     1017                                                            &IoCtx, offStart, cbThis, NULL,
     1018                                                            NULL, &cbThis, NULL,
     1019                                                            VD_DISCARD_MARK_UNUSED);
     1020                if (RT_FAILURE(rc))
     1021                    break;
     1022
     1023                fAllocated = true;
     1024            }
     1025
     1026            idxStart  = idxEnd;
     1027            offStart += cbThis;
     1028            cbLeft   -= cbThis;
     1029        }
     1030
     1031        if (RT_FAILURE(rc))
     1032            break;
     1033
     1034        PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
     1035        Assert(pBlockRemove == pBlock);
     1036        RTListNodeRemove(&pBlock->NodeLru);
     1037
     1038        pDiscard->cbDiscarding -= pBlock->cbDiscard;
     1039        RTMemFree(pBlock->pbmAllocated);
     1040        RTMemFree(pBlock);
     1041    }
     1042
     1043    Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
     1044
     1045    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1046    return rc;
     1047}
     1048
     1049/**
     1050 * Destroys the current discard state, writing any waiting blocks to the image.
     1051 *
     1052 * @returns VBox status code.
     1053 * @param   pDisk    VD disk container.
     1054 */
     1055static int vdDiscardStateDestroy(PVBOXHDD pDisk)
     1056{
     1057    int rc = VINF_SUCCESS;
     1058
     1059    if (pDisk->pDiscard)
     1060    {
     1061        rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
     1062        AssertRC(rc);
     1063        RTMemFree(pDisk->pDiscard->pTreeBlocks);
     1064        RTMemFree(pDisk->pDiscard);
     1065        pDisk->pDiscard = NULL;
     1066    }
     1067
     1068    return rc;
     1069}
     1070
     1071/**
     1072 * Marks the given range as allocated in the image.
     1073 * Required if there are discards in progress and a write to a block which can get discarded
     1074 * is written to.
     1075 *
     1076 * @returns VBox status code.
     1077 * @param   pDisk    VD container data.
     1078 * @param   uOffset  First byte to mark as allocated.
     1079 * @param   cbRange  Number of bytes to mark as allocated.
     1080 */
     1081static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
     1082{
     1083    PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
     1084    int rc = VINF_SUCCESS;
     1085
     1086    if (pDiscard)
     1087    {
     1088        do
     1089        {
     1090            size_t cbThisRange = cbRange;
     1091            PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
     1092
     1093            if (pBlock)
     1094            {
     1095                int32_t idxStart, idxEnd;
     1096
     1097                Assert(!(cbThisRange % 512));
     1098                Assert(!((uOffset - pBlock->Core.Key) % 512));
     1099
     1100                cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
     1101
     1102                idxStart = (uOffset - pBlock->Core.Key) / 512;
     1103                idxEnd = idxStart + (cbThisRange / 512);
     1104                ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
     1105            }
     1106            else
     1107            {
     1108                pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
     1109                if (pBlock)
     1110                    cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
     1111            }
     1112
     1113            Assert(cbRange >= cbThisRange);
     1114
     1115            uOffset += cbThisRange;
     1116            cbRange -= cbThisRange;
     1117        } while (cbRange != 0);
     1118    }
     1119
     1120    return rc;
     1121}
     1122
     1123DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1124                                  uint64_t uOffset, size_t cbTransfer,
     1125                                  PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
     1126                                  void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1127                                  uint32_t fFlags)
     1128{
     1129    PVDIOCTX pIoCtx = NULL;
     1130
     1131    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1132    if (RT_LIKELY(pIoCtx))
     1133    {
     1134        vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1135                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1136    }
     1137
     1138    return pIoCtx;
     1139}
     1140
     1141DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1142                                      uint64_t uOffset, size_t cbTransfer,
     1143                                      PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1144                                      PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1145                                      void *pvUser1, void *pvUser2,
     1146                                      void *pvAllocation,
     1147                                      PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1148                                      uint32_t fFlags)
     1149{
     1150    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1151                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
     1152
     1153    if (RT_LIKELY(pIoCtx))
     1154    {
     1155        pIoCtx->pIoCtxParent          = NULL;
     1156        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1157        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1158        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1159    }
     1160
     1161    LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
     1162    return pIoCtx;
     1163}
     1164
     1165DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
     1166                                         unsigned cRanges,
     1167                                         PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
     1168                                         void *pvUser1, void *pvUser2,
     1169                                         void *pvAllocation,
     1170                                         PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
     1171                                         uint32_t fFlags)
     1172{
     1173    PVDIOCTX pIoCtx = NULL;
     1174
     1175    pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
     1176    if (RT_LIKELY(pIoCtx))
     1177    {
     1178        pIoCtx->pIoCtxNext                = NULL;
     1179        pIoCtx->pDisk                     = pDisk;
     1180        pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
     1181        pIoCtx->cDataTransfersPending     = 0;
     1182        pIoCtx->cMetaTransfersPending     = 0;
     1183        pIoCtx->fComplete                 = false;
     1184        pIoCtx->fFlags                    = fFlags;
     1185        pIoCtx->pvAllocation              = pvAllocation;
     1186        pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
     1187        pIoCtx->pfnIoCtxTransferNext      = NULL;
     1188        pIoCtx->rcReq                     = VINF_SUCCESS;
     1189        pIoCtx->Req.Discard.paRanges      = paRanges;
     1190        pIoCtx->Req.Discard.cRanges       = cRanges;
     1191        pIoCtx->Req.Discard.idxRange      = 0;
     1192        pIoCtx->Req.Discard.cbDiscardLeft = 0;
     1193        pIoCtx->Req.Discard.offCur        = 0;
     1194        pIoCtx->Req.Discard.cbThisDiscard = 0;
     1195
     1196        pIoCtx->pIoCtxParent          = NULL;
     1197        pIoCtx->Type.Root.pfnComplete = pfnComplete;
     1198        pIoCtx->Type.Root.pvUser1     = pvUser1;
     1199        pIoCtx->Type.Root.pvUser2     = pvUser2;
     1200    }
     1201
     1202    LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
     1203    return pIoCtx;
     1204}
     1205
     1206DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
     1207                                       uint64_t uOffset, size_t cbTransfer,
     1208                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
     1209                                       PVDIOCTX pIoCtxParent, size_t cbTransferParent,
     1210                                       size_t cbWriteParent, void *pvAllocation,
     1211                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
     1212{
     1213    PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
     1214                                   pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);
     1215
     1216    AssertPtr(pIoCtxParent);
     1217    Assert(!pIoCtxParent->pIoCtxParent);
     1218
     1219    if (RT_LIKELY(pIoCtx))
     1220    {
     1221        pIoCtx->pIoCtxParent                   = pIoCtxParent;
     1222        pIoCtx->Type.Child.uOffsetSaved        = uOffset;
     1223        pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
     1224        pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
     1225        pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
     1226    }
     1227
     1228    LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
     1229    return pIoCtx;
     1230}
     1231
     1232DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
     1233{
     1234    PVDIOTASK pIoTask = NULL;
     1235
     1236    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1237    if (pIoTask)
     1238    {
     1239        pIoTask->pIoStorage           = pIoStorage;
     1240        pIoTask->pfnComplete          = pfnComplete;
     1241        pIoTask->pvUser               = pvUser;
     1242        pIoTask->fMeta                = false;
     1243        pIoTask->Type.User.cbTransfer = cbTransfer;
     1244        pIoTask->Type.User.pIoCtx     = pIoCtx;
     1245    }
     1246
     1247    return pIoTask;
     1248}
     1249
     1250DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
     1251{
     1252    PVDIOTASK pIoTask = NULL;
     1253
     1254    pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
     1255    if (pIoTask)
     1256    {
     1257        pIoTask->pIoStorage          = pIoStorage;
     1258        pIoTask->pfnComplete         = pfnComplete;
     1259        pIoTask->pvUser              = pvUser;
     1260        pIoTask->fMeta               = true;
     1261        pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
     1262    }
     1263
     1264    return pIoTask;
     1265}
     1266
     1267DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1268{
     1269    LogFlow(("Freeing I/O context %#p\n", pIoCtx));
     1270
     1271    if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
     1272    {
     1273        if (pIoCtx->pvAllocation)
     1274            RTMemFree(pIoCtx->pvAllocation);
     1275#ifdef DEBUG
     1276        memset(pIoCtx, 0xff, sizeof(VDIOCTX));
     1277#endif
     1278        RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
     1279    }
     1280}
     1281
     1282DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
     1283{
     1284    RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
     1285}
     1286
     1287DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
     1288{
     1289    AssertPtr(pIoCtx->pIoCtxParent);
     1290
     1291    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     1292    pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
     1293    pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
     1294}
     1295
     1296DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
     1297{
     1298    PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
     1299
     1300    if (RT_LIKELY(pMetaXfer))
     1301    {
     1302        pMetaXfer->Core.Key     = uOffset;
     1303        pMetaXfer->Core.KeyLast = uOffset + cb - 1;
     1304        pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
     1305        pMetaXfer->cbMeta       = cb;
     1306        pMetaXfer->pIoStorage   = pIoStorage;
     1307        pMetaXfer->cRefs        = 0;
     1308        RTListInit(&pMetaXfer->ListIoCtxWaiting);
     1309    }
     1310    return pMetaXfer;
     1311}
     1312
     1313DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
     1314{
     1315    /* Put it on the waiting list. */
     1316    PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
     1317    PVDIOCTX pHeadOld;
     1318    pIoCtx->pIoCtxNext = pNext;
     1319    while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
     1320    {
     1321        pNext = pHeadOld;
     1322        Assert(pNext != pIoCtx);
     1323        pIoCtx->pIoCtxNext = pNext;
     1324        ASMNopPause();
     1325    }
     1326}
     1327
     1328DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1329{
     1330    LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
     1331
     1332    Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
     1333    pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1334    vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
     1335}
     1336
     1337static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
     1338{
     1339    return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
     1340}
     1341
     1342static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
     1343{
     1344    return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
     1345}
     1346
     1347static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
     1348{
     1349    return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1350}
     1351
     1352static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
     1353{
     1354    return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
     1355}
     1356
     1357static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
     1358{
     1359    return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
     1360}
     1361
     1362/**
     1363 * Process the I/O context, core method which assumes that the I/O context
     1364 * acquired the lock.
     1365 *
     1366 * @returns VBox status code.
     1367 * @param   pIoCtx    I/O context to process.
     1368 */
     1369static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
     1370{
     1371    int rc = VINF_SUCCESS;
     1372
     1373    VD_IS_LOCKED(pIoCtx->pDisk);
     1374
     1375    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     1376
     1377    if (   !pIoCtx->cMetaTransfersPending
     1378        && !pIoCtx->cDataTransfersPending
     1379        && !pIoCtx->pfnIoCtxTransfer)
     1380    {
     1381        rc = VINF_VD_ASYNC_IO_FINISHED;
     1382        goto out;
     1383    }
     1384
     1385    /*
     1386     * We complete the I/O context in case of an error
     1387     * if there is no I/O task pending.
     1388     */
     1389    if (   RT_FAILURE(pIoCtx->rcReq)
     1390        && !pIoCtx->cMetaTransfersPending
     1391        && !pIoCtx->cDataTransfersPending)
     1392    {
     1393        rc = VINF_VD_ASYNC_IO_FINISHED;
     1394        goto out;
     1395    }
     1396
     1397    /* Don't change anything if there is a metadata transfer pending or we are blocked. */
     1398    if (   pIoCtx->cMetaTransfersPending
     1399        || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
     1400    {
     1401        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1402        goto out;
     1403    }
     1404
     1405    if (pIoCtx->pfnIoCtxTransfer)
     1406    {
     1407        /* Call the transfer function advancing to the next while there is no error. */
     1408        while (   pIoCtx->pfnIoCtxTransfer
     1409               && !pIoCtx->cMetaTransfersPending
     1410               && RT_SUCCESS(rc))
     1411        {
     1412            LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
     1413            rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
     1414
     1415            /* Advance to the next part of the transfer if the current one succeeded. */
     1416            if (RT_SUCCESS(rc))
     1417            {
     1418                pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
     1419                pIoCtx->pfnIoCtxTransferNext = NULL;
     1420            }
     1421        }
     1422    }
     1423
     1424    if (   RT_SUCCESS(rc)
     1425        && !pIoCtx->cMetaTransfersPending
     1426        && !pIoCtx->cDataTransfersPending)
     1427        rc = VINF_VD_ASYNC_IO_FINISHED;
     1428    else if (   RT_SUCCESS(rc)
     1429             || rc == VERR_VD_NOT_ENOUGH_METADATA
     1430             || rc == VERR_VD_IOCTX_HALT)
     1431        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1432    else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
     1433    {
     1434        ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
     1435        /*
     1436         * The I/O context completed if we have an error and there is no data
     1437         * or meta data transfer pending.
     1438         */
     1439        if (   !pIoCtx->cMetaTransfersPending
     1440            && !pIoCtx->cDataTransfersPending)
     1441            rc = VINF_VD_ASYNC_IO_FINISHED;
     1442        else
     1443            rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1444    }
     1445
     1446out:
     1447    LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
     1448                 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
     1449                 pIoCtx->fComplete));
     1450
     1451    return rc;
     1452}
     1453
     1454/**
     1455 * Processes the list of waiting I/O contexts.
     1456 *
     1457 * @returns VBox status code.
     1458 * @param   pDisk    The disk structure.
     1459 * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
     1460 *                   The status code is returned. NULL if there is no I/O context
     1461 *                   to return the status code for.
     1462 */
     1463static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
     1464{
     1465    int rc = VINF_SUCCESS;
     1466
     1467    LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
     1468
     1469    VD_IS_LOCKED(pDisk);
     1470
     1471    /* Get the waiting list and process it in FIFO order. */
     1472    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
     1473
     1474    /* Reverse it. */
     1475    PVDIOCTX pCur = pIoCtxHead;
     1476    pIoCtxHead = NULL;
     1477    while (pCur)
     1478    {
     1479        PVDIOCTX pInsert = pCur;
     1480        pCur = pCur->pIoCtxNext;
     1481        pInsert->pIoCtxNext = pIoCtxHead;
     1482        pIoCtxHead = pInsert;
     1483    }
     1484
     1485    /* Process now. */
     1486    pCur = pIoCtxHead;
     1487    while (pCur)
     1488    {
     1489        int rcTmp;
     1490        PVDIOCTX pTmp = pCur;
     1491
     1492        pCur = pCur->pIoCtxNext;
     1493        pTmp->pIoCtxNext = NULL;
     1494
     1495        /*
     1496         * Need to clear the sync flag here if there is a new I/O context
     1497         * with it set and the context is not given in pIoCtxRc.
     1498         * This happens most likely on a different thread and that one shouldn't
     1499         * process the context synchronously.
     1500         *
     1501         * The thread who issued the context will wait on the event semaphore
     1502         * anyway which is signalled when the completion handler is called.
     1503         */
     1504        if (   pTmp->fFlags & VDIOCTX_FLAGS_SYNC
     1505            && pTmp != pIoCtxRc)
     1506            pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
     1507
     1508        rcTmp = vdIoCtxProcessLocked(pTmp);
     1509        if (pTmp == pIoCtxRc)
     1510        {
     1511            /* The given I/O context was processed, pass the return code to the caller. */
     1512            rc = rcTmp;
     1513        }
     1514        else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
     1515                 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1516        {
     1517            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1518            vdThreadFinishWrite(pDisk);
     1519            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1520                                        pTmp->Type.Root.pvUser2,
     1521                                        pTmp->rcReq);
     1522            vdIoCtxFree(pDisk, pTmp);
     1523        }
     1524    }
     1525
     1526    LogFlowFunc(("returns rc=%Rrc\n", rc));
     1527    return rc;
     1528}
     1529
     1530/**
     1531 * Processes the list of blocked I/O contexts.
     1532 *
     1533 * @returns nothing.
     1534 * @param   pDisk    The disk structure.
     1535 */
     1536static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
     1537{
     1538    LogFlowFunc(("pDisk=%#p\n", pDisk));
     1539
     1540    VD_IS_LOCKED(pDisk);
     1541
     1542    /* Get the waiting list and process it in FIFO order. */
     1543    PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
     1544
     1545    /* Reverse it. */
     1546    PVDIOCTX pCur = pIoCtxHead;
     1547    pIoCtxHead = NULL;
     1548    while (pCur)
     1549    {
     1550        PVDIOCTX pInsert = pCur;
     1551        pCur = pCur->pIoCtxNext;
     1552        pInsert->pIoCtxNext = pIoCtxHead;
     1553        pIoCtxHead = pInsert;
     1554    }
     1555
     1556    /* Process now. */
     1557    pCur = pIoCtxHead;
     1558    while (pCur)
     1559    {
     1560        int rc;
     1561        PVDIOCTX pTmp = pCur;
     1562
     1563        pCur = pCur->pIoCtxNext;
     1564        pTmp->pIoCtxNext = NULL;
     1565
     1566        Assert(!pTmp->pIoCtxParent);
     1567        Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
     1568        pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
     1569
     1570        rc = vdIoCtxProcessLocked(pTmp);
     1571        if (   rc == VINF_VD_ASYNC_IO_FINISHED
     1572            && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
     1573        {
     1574            LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
     1575            vdThreadFinishWrite(pDisk);
     1576            pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
     1577                                        pTmp->Type.Root.pvUser2,
     1578                                        pTmp->rcReq);
     1579            vdIoCtxFree(pDisk, pTmp);
     1580        }
     1581    }
     1582
     1583    LogFlowFunc(("returns\n"));
     1584}
     1585
     1586/**
     1587 * Processes the I/O context trying to lock the criticial section.
     1588 * The context is deferred if the critical section is busy.
     1589 *
     1590 * @returns VBox status code.
     1591 * @param   pIoCtx    The I/O context to process.
     1592 */
     1593static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
     1594{
     1595    int rc = VINF_SUCCESS;
     1596    PVBOXHDD pDisk = pIoCtx->pDisk;
     1597
     1598    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     1599
     1600    /* Put it on the waiting list first. */
     1601    vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
     1602
     1603    if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
     1604    {
     1605        /* Leave it again, the context will be processed just before leaving the lock. */
     1606        LogFlowFunc(("Successfully acquired the lock\n"));
     1607        rc = vdDiskUnlock(pDisk, pIoCtx);
     1608    }
     1609    else
     1610    {
     1611        LogFlowFunc(("Lock is held\n"));
     1612        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1613    }
     1614
     1615    return rc;
     1616}
     1617
     1618/**
     1619 * Process the I/O context in a synchronous manner, waiting
     1620 * for it to complete.
     1621 *
     1622 * @returns VBox status code of the completed request.
     1623 * @param   pIoCtx    The sync I/O context.
     1624 */
     1625static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
     1626{
     1627    int rc = VINF_SUCCESS;
     1628    PVBOXHDD pDisk = pIoCtx->pDisk;
     1629
     1630    LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
     1631
     1632    AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
     1633              ("I/O context is not marked as synchronous\n"));
     1634
     1635    rc = vdIoCtxProcessTryLockDefer(pIoCtx);
     1636    if (rc == VINF_VD_ASYNC_IO_FINISHED)
     1637        rc = VINF_SUCCESS;
     1638
     1639    if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1640    {
     1641        rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
     1642        AssertRC(rc);
     1643
     1644        rc = pDisk->rcSync;
     1645    }
     1646    else /* Success or error. */
     1647        vdIoCtxFree(pDisk, pIoCtx);
     1648
     1649    return rc;
     1650}
     1651
     1652DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1653{
     1654    return pDisk->pIoCtxLockOwner == pIoCtx;
     1655}
     1656
     1657static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
     1658{
     1659    int rc = VINF_SUCCESS;
     1660
     1661    VD_IS_LOCKED(pDisk);
     1662
     1663    LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
     1664
     1665    if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
     1666    {
     1667        Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
     1668        vdIoCtxDefer(pDisk, pIoCtx);
     1669        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     1670    }
     1671
     1672    LogFlowFunc(("returns -> %Rrc\n", rc));
     1673    return rc;
     1674}
     1675
     1676static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
     1677{
     1678    LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
     1679                 pDisk, pIoCtx, fProcessBlockedReqs));
     1680
     1681    VD_IS_LOCKED(pDisk);
     1682
     1683    LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
     1684    Assert(pDisk->pIoCtxLockOwner == pIoCtx);
     1685    ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
     1686
     1687    if (fProcessBlockedReqs)
     1688    {
     1689        /* Process any blocked writes if the current request didn't caused another growing. */
     1690        vdDiskProcessBlockedIoCtx(pDisk);
     1691    }
     1692
     1693    LogFlowFunc(("returns\n"));
     1694}
     1695
     1696/**
    9101697 * Internal: Reads a given amount of data from the image chain of the disk.
    9111698 **/
    9121699static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
    913                             uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)
     1700                            uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
    9141701{
    9151702    int rc = VINF_SUCCESS;
    9161703    size_t cbThisRead = cbRead;
    917     RTSGSEG SegmentBuf;
    918     RTSGBUF SgBuf;
    919     VDIOCTX IoCtx;
    9201704
    9211705    AssertPtr(pcbThisRead);
    9221706
    9231707    *pcbThisRead = 0;
    924 
    925     SegmentBuf.pvSeg = pvBuf;
    926     SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
    927     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    928     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    929                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    9301708
    9311709    /*
     
    9341712     */
    9351713    rc = pImage->Backend->pfnRead(pImage->pBackendData,
    936                                        uOffset, cbThisRead, &IoCtx,
    937                                        &cbThisRead);
     1714                                  uOffset, cbThisRead, pIoCtx,
     1715                                  &cbThisRead);
    9381716
    9391717    if (rc == VERR_VD_BLOCK_FREE)
     
    9441722        {
    9451723            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    946                                                    uOffset, cbThisRead, &IoCtx,
    947                                                    &cbThisRead);
     1724                                              uOffset, cbThisRead, pIoCtx,
     1725                                              &cbThisRead);
    9481726        }
    9491727    }
     
    9521730        *pcbThisRead = cbThisRead;
    9531731
     1732    return rc;
     1733}
     1734
     1735/**
     1736 * internal: read the specified amount of data in whatever blocks the backend
     1737 * will give us - async version.
     1738 */
     1739static int vdReadHelperAsync(PVDIOCTX pIoCtx)
     1740{
     1741    int rc;
     1742    PVBOXHDD pDisk                = pIoCtx->pDisk;
     1743    size_t cbToRead               = pIoCtx->Req.Io.cbTransfer;
     1744    uint64_t uOffset              = pIoCtx->Req.Io.uOffset;
     1745    PVDIMAGE pCurrImage           = pIoCtx->Req.Io.pImageCur;
     1746    PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
     1747    unsigned cImagesRead          = pIoCtx->Req.Io.cImagesRead;
     1748    size_t cbThisRead;
     1749
     1750    /* Loop until all reads started or we have a backend which needs to read metadata. */
     1751    do
     1752    {
     1753        /* Search for image with allocated block. Do not attempt to read more
     1754         * than the previous reads marked as valid. Otherwise this would return
     1755         * stale data when different block sizes are used for the images. */
     1756        cbThisRead = cbToRead;
     1757
     1758        if (   pDisk->pCache
     1759            && !pImageParentOverride)
     1760        {
     1761            rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
     1762                                   pIoCtx, &cbThisRead);
     1763            if (rc == VERR_VD_BLOCK_FREE)
     1764            {
     1765                rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
     1766                                      pIoCtx, &cbThisRead);
     1767
     1768                /* If the read was successful, write the data back into the cache. */
     1769                if (   RT_SUCCESS(rc)
     1770                    && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE)
     1771                {
     1772                    rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
     1773                                            pIoCtx, NULL);
     1774                }
     1775            }
     1776        }
     1777        else
     1778        {
     1779
     1780            /*
     1781             * Try to read from the given image.
     1782             * If the block is not allocated read from override chain if present.
     1783             */
     1784            rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1785                                              uOffset, cbThisRead, pIoCtx,
     1786                                              &cbThisRead);
     1787
     1788            if (   rc == VERR_VD_BLOCK_FREE
     1789                && cImagesRead != 1)
     1790            {
     1791                unsigned cImagesToProcess = cImagesRead;
     1792
     1793                pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
     1794                pIoCtx->Req.Io.pImageParentOverride = NULL;
     1795
     1796                while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
     1797                {
     1798                    rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
     1799                                                      uOffset, cbThisRead,
     1800                                                      pIoCtx, &cbThisRead);
     1801                    if (cImagesToProcess == 1)
     1802                        break;
     1803                    else if (cImagesToProcess > 0)
     1804                        cImagesToProcess--;
     1805
     1806                    if (rc == VERR_VD_BLOCK_FREE)
     1807                        pCurrImage = pCurrImage->pPrev;
     1808                }
     1809            }
     1810        }
     1811
     1812        /* The task state will be updated on success already, don't do it here!. */
     1813        if (rc == VERR_VD_BLOCK_FREE)
     1814        {
     1815            /* No image in the chain contains the data for the block. */
     1816            ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
     1817
     1818            /* Fill the free space with 0 if we are told to do so
     1819             * or a previous read returned valid data. */
     1820            if (   (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
     1821                || !pIoCtx->Req.Io.fAllFree)
     1822                vdIoCtxSet(pIoCtx, '\0', cbThisRead);
     1823            else
     1824                pIoCtx->Req.Io.cbBufClear += cbThisRead;
     1825
     1826            if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
     1827                rc = VINF_VD_NEW_ZEROED_BLOCK;
     1828            else
     1829                rc = VINF_SUCCESS;
     1830        }
     1831        else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
     1832            rc = VINF_SUCCESS;
     1833        else if (rc == VERR_VD_IOCTX_HALT)
     1834        {
     1835            uOffset  += cbThisRead;
     1836            cbToRead -= cbThisRead;
     1837            pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
     1838        }
     1839        else if (   RT_SUCCESS(rc)
     1840                 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
     1841        {
     1842            /* First not free block, fill the space before with 0. */
     1843            RTSGBUF SgBuf;
     1844            RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
     1845            RTSgBufReset(&SgBuf);
     1846            RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
     1847            pIoCtx->Req.Io.cbBufClear = 0;
     1848            pIoCtx->Req.Io.fAllFree = false;
     1849        }
     1850
     1851        if (RT_FAILURE(rc))
     1852            break;
     1853
     1854        cbToRead -= cbThisRead;
     1855        uOffset  += cbThisRead;
     1856        pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
     1857    } while (cbToRead != 0 && RT_SUCCESS(rc));
     1858
     1859    if (   rc == VERR_VD_NOT_ENOUGH_METADATA
     1860        || rc == VERR_VD_IOCTX_HALT)
     1861    {
     1862        /* Save the current state. */
     1863        pIoCtx->Req.Io.uOffset    = uOffset;
     1864        pIoCtx->Req.Io.cbTransfer = cbToRead;
     1865        pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
     1866    }
     1867
     1868    return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) && pIoCtx->Req.Io.fAllFree)
     1869           ? VERR_VD_BLOCK_FREE
     1870           : rc;
     1871}
     1872
     1873/**
     1874 * internal: parent image read wrapper for compacting.
     1875 */
     1876static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
     1877                        size_t cbRead)
     1878{
     1879    PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
     1880
     1881    /** @todo
     1882     * Only used for compaction so far which is not possible to mix with async I/O.
     1883     * Needs to be changed if we want to support online compaction of images.
     1884     */
     1885    bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
     1886    AssertMsgReturn(!fLocked,
     1887                    ("Calling synchronous parent read while another thread holds the disk lock\n"),
     1888                    VERR_VD_INVALID_STATE);
     1889
     1890    /* Fake an I/O context. */
     1891    RTSGSEG Segment;
     1892    RTSGBUF SgBuf;
     1893    VDIOCTX IoCtx;
     1894
     1895    Segment.pvSeg = pvBuf;
     1896    Segment.cbSeg = cbRead;
     1897    RTSgBufInit(&SgBuf, &Segment, 1);
     1898    vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
     1899                &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
     1900    int rc = vdReadHelperAsync(&IoCtx);
     1901    ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
    9541902    return rc;
    9551903}
     
    9841932                          bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
    9851933{
    986     int rc = VINF_SUCCESS;
    987     size_t cbThisRead;
    988     bool fAllFree = true;
    989     size_t cbBufClear = 0;
    990 
    991     /* Loop until all read. */
    992     do
    993     {
    994         /* Search for image with allocated block. Do not attempt to read more
    995          * than the previous reads marked as valid. Otherwise this would return
    996          * stale data when different block sizes are used for the images. */
    997         cbThisRead = cbRead;
    998 
    999         if (   pDisk->pCache
    1000             && !pImageParentOverride)
    1001         {
    1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1003             rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf,
    1004                                    cbThisRead, &cbThisRead);
    1005 #endif
    1006             if (rc == VERR_VD_BLOCK_FREE)
    1007             {
    1008                 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead,
    1009                                       &cbThisRead);
    1010 
    1011                 /* If the read was successful, write the data back into the cache. */
    1012                 if (   RT_SUCCESS(rc)
    1013                     && fUpdateCache)
    1014                 {
    1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */
    1016                     rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf,
    1017                                             cbThisRead, NULL);
    1018 #endif
    1019                 }
    1020             }
    1021         }
    1022         else
    1023         {
    1024             RTSGSEG SegmentBuf;
    1025             RTSGBUF SgBuf;
    1026             VDIOCTX IoCtx;
    1027 
    1028             SegmentBuf.pvSeg = pvBuf;
    1029             SegmentBuf.cbSeg = cbThisRead;
    1030             RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    1031             vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,
    1032                         &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1033 
    1034             /*
    1035              * Try to read from the given image.
    1036              * If the block is not allocated read from override chain if present.
    1037              */
    1038             rc = pImage->Backend->pfnRead(pImage->pBackendData,
    1039                                                uOffset, cbThisRead, &IoCtx,
    1040                                                &cbThisRead);
    1041 
    1042             if (   rc == VERR_VD_BLOCK_FREE
    1043                 && cImagesRead != 1)
    1044             {
    1045                 unsigned cImagesToProcess = cImagesRead;
    1046 
    1047                 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
    1048                      pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
    1049                      pCurrImage = pCurrImage->pPrev)
    1050                 {
    1051                     rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1052                                                            uOffset, cbThisRead,
    1053                                                            &IoCtx, &cbThisRead);
    1054                     if (cImagesToProcess == 1)
    1055                         break;
    1056                     else if (cImagesToProcess > 0)
    1057                         cImagesToProcess--;
    1058                 }
    1059             }
    1060         }
    1061 
    1062         /* No image in the chain contains the data for the block. */
    1063         if (rc == VERR_VD_BLOCK_FREE)
    1064         {
    1065             /* Fill the free space with 0 if we are told to do so
    1066              * or a previous read returned valid data. */
    1067             if (fZeroFreeBlocks || !fAllFree)
    1068                 memset(pvBuf, '\0', cbThisRead);
    1069             else
    1070                 cbBufClear += cbThisRead;
    1071 
    1072             if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
    1073                 rc = VINF_VD_NEW_ZEROED_BLOCK;
    1074             else
    1075                 rc = VINF_SUCCESS;
    1076         }
    1077         else if (RT_SUCCESS(rc))
    1078         {
    1079             /* First not free block, fill the space before with 0. */
    1080             if (!fZeroFreeBlocks)
    1081             {
    1082                 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear);
    1083                 cbBufClear = 0;
    1084                 fAllFree = false;
    1085             }
    1086         }
    1087 
    1088         cbRead -= cbThisRead;
    1089         uOffset += cbThisRead;
    1090         pvBuf = (char *)pvBuf + cbThisRead;
    1091     } while (cbRead != 0 && RT_SUCCESS(rc));
    1092 
    1093     return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc;
     1934    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     1935    RTSGSEG Segment;
     1936    RTSGBUF SgBuf;
     1937    VDIOCTX IoCtx;
     1938
     1939    if (fZeroFreeBlocks)
     1940        fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     1941    if (fUpdateCache)
     1942        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     1943
     1944    Segment.pvSeg = pvBuf;
     1945    Segment.cbSeg = cbRead;
     1946    RTSgBufInit(&SgBuf, &Segment, 1);
     1947    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
     1948                NULL, vdReadHelperAsync, fFlags);
     1949
     1950    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     1951    IoCtx.Req.Io.cImagesRead = cImagesRead;
     1952    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     1953    IoCtx.Type.Root.pvUser1     = pDisk;
     1954    IoCtx.Type.Root.pvUser2     = NULL;
     1955    return vdIoCtxProcessSync(&IoCtx);
    10941956}
    10951957
     
    11031965    return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
    11041966                          true /* fZeroFreeBlocks */, fUpdateCache, 0);
    1105 }
    1106 
    1107 /**
    1108  * Creates a new empty discard state.
    1109  *
    1110  * @returns Pointer to the new discard state or NULL if out of memory.
    1111  */
    1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)
    1113 {
    1114     PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
    1115 
    1116     if (pDiscard)
    1117     {
    1118         RTListInit(&pDiscard->ListLru);
    1119         pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
    1120         if (!pDiscard->pTreeBlocks)
    1121         {
    1122             RTMemFree(pDiscard);
    1123             pDiscard = NULL;
    1124         }
    1125     }
    1126 
    1127     return pDiscard;
    1128 }
    1129 
    1130 /**
    1131  * Removes the least recently used blocks from the waiting list until
    1132  * the new value is reached.
    1133  *
    1134  * @returns VBox status code.
    1135  * @param   pDisk              VD disk container.
    1136  * @param   pDiscard           The discard state.
    1137  * @param   cbDiscardingNew    How many bytes should be waiting on success.
    1138  *                             The number of bytes waiting can be less.
    1139  */
    1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
    1141 {
    1142     int rc = VINF_SUCCESS;
    1143 
    1144     LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
    1145                  pDisk, pDiscard, cbDiscardingNew));
    1146 
    1147     while (pDiscard->cbDiscarding > cbDiscardingNew)
    1148     {
    1149         PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
    1150 
    1151         Assert(!RTListIsEmpty(&pDiscard->ListLru));
    1152 
    1153         /* Go over the allocation bitmap and mark all discarded sectors as unused. */
    1154         uint64_t offStart = pBlock->Core.Key;
    1155         uint32_t idxStart = 0;
    1156         size_t cbLeft = pBlock->cbDiscard;
    1157         bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
    1158         uint32_t cSectors = pBlock->cbDiscard / 512;
    1159 
    1160         while (cbLeft > 0)
    1161         {
    1162             int32_t idxEnd;
    1163             size_t cbThis = cbLeft;
    1164 
    1165             if (fAllocated)
    1166             {
    1167                 /* Check for the first unallocated bit. */
    1168                 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
    1169                 if (idxEnd != -1)
    1170                 {
    1171                     cbThis = (idxEnd - idxStart) * 512;
    1172                     fAllocated = false;
    1173                 }
    1174             }
    1175             else
    1176             {
    1177                 /* Mark as unused and check for the first set bit. */
    1178                 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
    1179                 if (idxEnd != -1)
    1180                     cbThis = (idxEnd - idxStart) * 512;
    1181 
    1182 
    1183                 VDIOCTX IoCtx;
    1184                 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
    1185                             NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    1186                 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
    1187                                                             &IoCtx, offStart, cbThis, NULL,
    1188                                                             NULL, &cbThis, NULL,
    1189                                                             VD_DISCARD_MARK_UNUSED);
    1190                 if (RT_FAILURE(rc))
    1191                     break;
    1192 
    1193                 fAllocated = true;
    1194             }
    1195 
    1196             idxStart  = idxEnd;
    1197             offStart += cbThis;
    1198             cbLeft   -= cbThis;
    1199         }
    1200 
    1201         if (RT_FAILURE(rc))
    1202             break;
    1203 
    1204         PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
    1205         Assert(pBlockRemove == pBlock);
    1206         RTListNodeRemove(&pBlock->NodeLru);
    1207 
    1208         pDiscard->cbDiscarding -= pBlock->cbDiscard;
    1209         RTMemFree(pBlock->pbmAllocated);
    1210         RTMemFree(pBlock);
    1211     }
    1212 
    1213     Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
    1214 
    1215     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1216     return rc;
    1217 }
    1218 
    1219 /**
    1220  * Destroys the current discard state, writing any waiting blocks to the image.
    1221  *
    1222  * @returns VBox status code.
    1223  * @param   pDisk    VD disk container.
    1224  */
    1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)
    1226 {
    1227     int rc = VINF_SUCCESS;
    1228 
    1229     if (pDisk->pDiscard)
    1230     {
    1231         rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
    1232         AssertRC(rc);
    1233         RTMemFree(pDisk->pDiscard->pTreeBlocks);
    1234         RTMemFree(pDisk->pDiscard);
    1235         pDisk->pDiscard = NULL;
    1236     }
    1237 
    1238     return rc;
    1239 }
    1240 
    1241 /**
    1242  * Marks the given range as allocated in the image.
    1243  * Required if there are discards in progress and a write to a block which can get discarded
    1244  * is written to.
    1245  *
    1246  * @returns VBox status code.
    1247  * @param   pDisk    VD container data.
    1248  * @param   uOffset  First byte to mark as allocated.
    1249  * @param   cbRange  Number of bytes to mark as allocated.
    1250  */
    1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)
    1252 {
    1253     PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
    1254     int rc = VINF_SUCCESS;
    1255 
    1256     if (pDiscard)
    1257     {
    1258         do
    1259         {
    1260             size_t cbThisRange = cbRange;
    1261             PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
    1262 
    1263             if (pBlock)
    1264             {
    1265                 int32_t idxStart, idxEnd;
    1266 
    1267                 Assert(!(cbThisRange % 512));
    1268                 Assert(!((uOffset - pBlock->Core.Key) % 512));
    1269 
    1270                 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
    1271 
    1272                 idxStart = (uOffset - pBlock->Core.Key) / 512;
    1273                 idxEnd = idxStart + (cbThisRange / 512);
    1274                 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
    1275             }
    1276             else
    1277             {
    1278                 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
    1279                 if (pBlock)
    1280                     cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
    1281             }
    1282 
    1283             Assert(cbRange >= cbThisRange);
    1284 
    1285             uOffset += cbThisRange;
    1286             cbRange -= cbThisRange;
    1287         } while (cbRange != 0);
    1288     }
    1289 
    1290     return rc;
    1291 }
    1292 
    1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1294                                   uint64_t uOffset, size_t cbTransfer,
    1295                                   PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
    1296                                   void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1297                                   uint32_t fFlags)
    1298 {
    1299     PVDIOCTX pIoCtx = NULL;
    1300 
    1301     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1302     if (RT_LIKELY(pIoCtx))
    1303     {
    1304         vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1305                     pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1306     }
    1307 
    1308     return pIoCtx;
    1309 }
    1310 
    1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1312                                       uint64_t uOffset, size_t cbTransfer,
    1313                                       PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1314                                       PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1315                                       void *pvUser1, void *pvUser2,
    1316                                       void *pvAllocation,
    1317                                       PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1318                                       uint32_t fFlags)
    1319 {
    1320     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1321                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
    1322 
    1323     if (RT_LIKELY(pIoCtx))
    1324     {
    1325         pIoCtx->pIoCtxParent          = NULL;
    1326         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1327         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1328         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1329     }
    1330 
    1331     LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
    1332     return pIoCtx;
    1333 }
    1334 
    1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,
    1336                                          unsigned cRanges,
    1337                                          PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
    1338                                          void *pvUser1, void *pvUser2,
    1339                                          void *pvAllocation,
    1340                                          PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
    1341                                          uint32_t fFlags)
    1342 {
    1343     PVDIOCTX pIoCtx = NULL;
    1344 
    1345     pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
    1346     if (RT_LIKELY(pIoCtx))
    1347     {
    1348         pIoCtx->pIoCtxNext                = NULL;
    1349         pIoCtx->pDisk                     = pDisk;
    1350         pIoCtx->enmTxDir                  = VDIOCTXTXDIR_DISCARD;
    1351         pIoCtx->cDataTransfersPending     = 0;
    1352         pIoCtx->cMetaTransfersPending     = 0;
    1353         pIoCtx->fComplete                 = false;
    1354         pIoCtx->fFlags                    = fFlags;
    1355         pIoCtx->pvAllocation              = pvAllocation;
    1356         pIoCtx->pfnIoCtxTransfer          = pfnIoCtxTransfer;
    1357         pIoCtx->pfnIoCtxTransferNext      = NULL;
    1358         pIoCtx->rcReq                     = VINF_SUCCESS;
    1359         pIoCtx->Req.Discard.paRanges      = paRanges;
    1360         pIoCtx->Req.Discard.cRanges       = cRanges;
    1361         pIoCtx->Req.Discard.idxRange      = 0;
    1362         pIoCtx->Req.Discard.cbDiscardLeft = 0;
    1363         pIoCtx->Req.Discard.offCur        = 0;
    1364         pIoCtx->Req.Discard.cbThisDiscard = 0;
    1365 
    1366         pIoCtx->pIoCtxParent          = NULL;
    1367         pIoCtx->Type.Root.pfnComplete = pfnComplete;
    1368         pIoCtx->Type.Root.pvUser1     = pvUser1;
    1369         pIoCtx->Type.Root.pvUser2     = pvUser2;
    1370     }
    1371 
    1372     LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
    1373     return pIoCtx;
    1374 }
    1375 
    1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,
    1377                                        uint64_t uOffset, size_t cbTransfer,
    1378                                        PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
    1379                                        PVDIOCTX pIoCtxParent, size_t cbTransferParent,
    1380                                        size_t cbWriteParent, void *pvAllocation,
    1381                                        PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
    1382 {
    1383     PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
    1384                                    pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);
    1385 
    1386     AssertPtr(pIoCtxParent);
    1387     Assert(!pIoCtxParent->pIoCtxParent);
    1388 
    1389     if (RT_LIKELY(pIoCtx))
    1390     {
    1391         pIoCtx->pIoCtxParent                   = pIoCtxParent;
    1392         pIoCtx->Type.Child.uOffsetSaved        = uOffset;
    1393         pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
    1394         pIoCtx->Type.Child.cbTransferParent    = cbTransferParent;
    1395         pIoCtx->Type.Child.cbWriteParent       = cbWriteParent;
    1396     }
    1397 
    1398     LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
    1399     return pIoCtx;
    1400 }
    1401 
    1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
    1403 {
    1404     PVDIOTASK pIoTask = NULL;
    1405 
    1406     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1407     if (pIoTask)
    1408     {
    1409         pIoTask->pIoStorage           = pIoStorage;
    1410         pIoTask->pfnComplete          = pfnComplete;
    1411         pIoTask->pvUser               = pvUser;
    1412         pIoTask->fMeta                = false;
    1413         pIoTask->Type.User.cbTransfer = cbTransfer;
    1414         pIoTask->Type.User.pIoCtx     = pIoCtx;
    1415     }
    1416 
    1417     return pIoTask;
    1418 }
    1419 
    1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
    1421 {
    1422     PVDIOTASK pIoTask = NULL;
    1423 
    1424     pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
    1425     if (pIoTask)
    1426     {
    1427         pIoTask->pIoStorage          = pIoStorage;
    1428         pIoTask->pfnComplete         = pfnComplete;
    1429         pIoTask->pvUser              = pvUser;
    1430         pIoTask->fMeta               = true;
    1431         pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
    1432     }
    1433 
    1434     return pIoTask;
    1435 }
    1436 
    1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1438 {
    1439     LogFlow(("Freeing I/O context %#p\n", pIoCtx));
    1440     if (pIoCtx->pvAllocation)
    1441         RTMemFree(pIoCtx->pvAllocation);
    1442 #ifdef DEBUG
    1443     memset(pIoCtx, 0xff, sizeof(VDIOCTX));
    1444 #endif
    1445     RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
    1446 }
    1447 
    1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)
    1449 {
    1450     RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
    1451 }
    1452 
    1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
    1454 {
    1455     AssertPtr(pIoCtx->pIoCtxParent);
    1456 
    1457     RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    1458     pIoCtx->Req.Io.uOffset        = pIoCtx->Type.Child.uOffsetSaved;
    1459     pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;
    1460 }
    1461 
    1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
    1463 {
    1464     PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
    1465 
    1466     if (RT_LIKELY(pMetaXfer))
    1467     {
    1468         pMetaXfer->Core.Key     = uOffset;
    1469         pMetaXfer->Core.KeyLast = uOffset + cb - 1;
    1470         pMetaXfer->fFlags       = VDMETAXFER_TXDIR_NONE;
    1471         pMetaXfer->cbMeta       = cb;
    1472         pMetaXfer->pIoStorage   = pIoStorage;
    1473         pMetaXfer->cRefs        = 0;
    1474         RTListInit(&pMetaXfer->ListIoCtxWaiting);
    1475     }
    1476     return pMetaXfer;
    1477 }
    1478 
    1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
    1480 {
    1481     /* Put it on the waiting list. */
    1482     PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
    1483     PVDIOCTX pHeadOld;
    1484     pIoCtx->pIoCtxNext = pNext;
    1485     while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
    1486     {
    1487         pNext = pHeadOld;
    1488         Assert(pNext != pIoCtx);
    1489         pIoCtx->pIoCtxNext = pNext;
    1490         ASMNopPause();
    1491     }
    1492 }
    1493 
    1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1495 {
    1496     LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));
    1497 
    1498     Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
    1499     pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1500     vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
    1501 }
    1502 
    1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
    1504 {
    1505     return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
    1506 }
    1507 
    1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
    1509 {
    1510     return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
    1511 }
    1512 
    1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
    1514 {
    1515     return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1516 }
    1517 
    1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
    1519 {
    1520     return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
    1521 }
    1522 
    1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
    1524 {
    1525     return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
    1526 }
    1527 
    1528 /**
    1529  * Process the I/O context, core method which assumes that the I/O context
    1530  * acquired the lock.
    1531  *
    1532  * @returns VBox status code.
    1533  * @param   pIoCtx    I/O context to process.
    1534  */
    1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
    1536 {
    1537     int rc = VINF_SUCCESS;
    1538 
    1539     VD_IS_LOCKED(pIoCtx->pDisk);
    1540 
    1541     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1542 
    1543     if (   !pIoCtx->cMetaTransfersPending
    1544         && !pIoCtx->cDataTransfersPending
    1545         && !pIoCtx->pfnIoCtxTransfer)
    1546     {
    1547         rc = VINF_VD_ASYNC_IO_FINISHED;
    1548         goto out;
    1549     }
    1550 
    1551     /*
    1552      * We complete the I/O context in case of an error
    1553      * if there is no I/O task pending.
    1554      */
    1555     if (   RT_FAILURE(pIoCtx->rcReq)
    1556         && !pIoCtx->cMetaTransfersPending
    1557         && !pIoCtx->cDataTransfersPending)
    1558     {
    1559         rc = VINF_VD_ASYNC_IO_FINISHED;
    1560         goto out;
    1561     }
    1562 
    1563     /* Don't change anything if there is a metadata transfer pending or we are blocked. */
    1564     if (   pIoCtx->cMetaTransfersPending
    1565         || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
    1566     {
    1567         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1568         goto out;
    1569     }
    1570 
    1571     if (pIoCtx->pfnIoCtxTransfer)
    1572     {
    1573         /* Call the transfer function advancing to the next while there is no error. */
    1574         while (   pIoCtx->pfnIoCtxTransfer
    1575                && !pIoCtx->cMetaTransfersPending
    1576                && RT_SUCCESS(rc))
    1577         {
    1578             LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
    1579             rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
    1580 
    1581             /* Advance to the next part of the transfer if the current one succeeded. */
    1582             if (RT_SUCCESS(rc))
    1583             {
    1584                 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
    1585                 pIoCtx->pfnIoCtxTransferNext = NULL;
    1586             }
    1587         }
    1588     }
    1589 
    1590     if (   RT_SUCCESS(rc)
    1591         && !pIoCtx->cMetaTransfersPending
    1592         && !pIoCtx->cDataTransfersPending)
    1593         rc = VINF_VD_ASYNC_IO_FINISHED;
    1594     else if (   RT_SUCCESS(rc)
    1595              || rc == VERR_VD_NOT_ENOUGH_METADATA
    1596              || rc == VERR_VD_IOCTX_HALT)
    1597         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1598     else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
    1599     {
    1600         ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
    1601         /*
    1602          * The I/O context completed if we have an error and there is no data
    1603          * or meta data transfer pending.
    1604          */
    1605         if (   !pIoCtx->cMetaTransfersPending
    1606             && !pIoCtx->cDataTransfersPending)
    1607             rc = VINF_VD_ASYNC_IO_FINISHED;
    1608         else
    1609             rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1610     }
    1611 
    1612 out:
    1613     LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    1614                  pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
    1615                  pIoCtx->fComplete));
    1616 
    1617     return rc;
    1618 }
    1619 
    1620 /**
    1621  * Processes the list of waiting I/O contexts.
    1622  *
    1623  * @returns VBox status code.
    1624  * @param   pDisk    The disk structure.
    1625  * @param   pIoCtxRc An I/O context handle which waits on the list. When processed
    1626  *                   The status code is returned. NULL if there is no I/O context
    1627  *                   to return the status code for.
    1628  */
    1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)
    1630 {
    1631     int rc = VINF_SUCCESS;
    1632 
    1633     LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
    1634 
    1635     VD_IS_LOCKED(pDisk);
    1636 
    1637     /* Get the waiting list and process it in FIFO order. */
    1638     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
    1639 
    1640     /* Reverse it. */
    1641     PVDIOCTX pCur = pIoCtxHead;
    1642     pIoCtxHead = NULL;
    1643     while (pCur)
    1644     {
    1645         PVDIOCTX pInsert = pCur;
    1646         pCur = pCur->pIoCtxNext;
    1647         pInsert->pIoCtxNext = pIoCtxHead;
    1648         pIoCtxHead = pInsert;
    1649     }
    1650 
    1651     /* Process now. */
    1652     pCur = pIoCtxHead;
    1653     while (pCur)
    1654     {
    1655         int rcTmp;
    1656         PVDIOCTX pTmp = pCur;
    1657 
    1658         pCur = pCur->pIoCtxNext;
    1659         pTmp->pIoCtxNext = NULL;
    1660 
    1661         rcTmp = vdIoCtxProcessLocked(pTmp);
    1662         if (pTmp == pIoCtxRc)
    1663         {
    1664             /* The given I/O context was processed, pass the return code to the caller. */
    1665             rc = rcTmp;
    1666         }
    1667         else if (   rcTmp == VINF_VD_ASYNC_IO_FINISHED
    1668                  && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1669         {
    1670             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1671             vdThreadFinishWrite(pDisk);
    1672             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1673                                         pTmp->Type.Root.pvUser2,
    1674                                         pTmp->rcReq);
    1675             vdIoCtxFree(pDisk, pTmp);
    1676         }
    1677     }
    1678 
    1679     LogFlowFunc(("returns rc=%Rrc\n", rc));
    1680     return rc;
    1681 }
    1682 
    1683 /**
    1684  * Processes the list of blocked I/O contexts.
    1685  *
    1686  * @returns nothing.
    1687  * @param   pDisk    The disk structure.
    1688  */
    1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)
    1690 {
    1691     LogFlowFunc(("pDisk=%#p\n", pDisk));
    1692 
    1693     VD_IS_LOCKED(pDisk);
    1694 
    1695     /* Get the waiting list and process it in FIFO order. */
    1696     PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
    1697 
    1698     /* Reverse it. */
    1699     PVDIOCTX pCur = pIoCtxHead;
    1700     pIoCtxHead = NULL;
    1701     while (pCur)
    1702     {
    1703         PVDIOCTX pInsert = pCur;
    1704         pCur = pCur->pIoCtxNext;
    1705         pInsert->pIoCtxNext = pIoCtxHead;
    1706         pIoCtxHead = pInsert;
    1707     }
    1708 
    1709     /* Process now. */
    1710     pCur = pIoCtxHead;
    1711     while (pCur)
    1712     {
    1713         int rc;
    1714         PVDIOCTX pTmp = pCur;
    1715 
    1716         pCur = pCur->pIoCtxNext;
    1717         pTmp->pIoCtxNext = NULL;
    1718 
    1719         Assert(!pTmp->pIoCtxParent);
    1720         Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
    1721         pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
    1722 
    1723         rc = vdIoCtxProcessLocked(pTmp);
    1724         if (   rc == VINF_VD_ASYNC_IO_FINISHED
    1725             && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
    1726         {
    1727             LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
    1728             vdThreadFinishWrite(pDisk);
    1729             pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,
    1730                                         pTmp->Type.Root.pvUser2,
    1731                                         pTmp->rcReq);
    1732             vdIoCtxFree(pDisk, pTmp);
    1733         }
    1734     }
    1735 
    1736     LogFlowFunc(("returns\n"));
    1737 }
    1738 
    1739 /**
    1740  * Processes the I/O context trying to lock the criticial section.
    1741  * The context is deferred if the critical section is busy.
    1742  *
    1743  * @returns VBox status code.
    1744  * @param   pIoCtx    The I/O context to process.
    1745  */
    1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
    1747 {
    1748     int rc = VINF_SUCCESS;
    1749     PVBOXHDD pDisk = pIoCtx->pDisk;
    1750 
    1751     LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    1752 
    1753     /* Put it on the waiting list first. */
    1754     vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
    1755 
    1756     if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
    1757     {
    1758         /* Leave it again, the context will be processed just before leaving the lock. */
    1759         LogFlowFunc(("Successfully acquired the lock\n"));
    1760         rc = vdDiskUnlock(pDisk, pIoCtx);
    1761     }
    1762     else
    1763     {
    1764         LogFlowFunc(("Lock is held\n"));
    1765         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1766     }
    1767 
    1768     return rc;
    1769 }
    1770 
    1771 /**
    1772  * Process the I/O context in a synchronous manner, waiting
    1773  * for it to complete.
    1774  *
    1775  * @returns VBox status code of the completed request.
    1776  * @param   pIoCtx    The sync I/O context.
    1777  */
    1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)
    1779 {
    1780     int rc = VINF_SUCCESS;
    1781     PVBOXHDD pDisk = pIoCtx->pDisk;
    1782 
    1783     LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
    1784 
    1785     AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,
    1786               ("I/O context is not marked as synchronous\n"));
    1787 
    1788     rc = vdIoCtxProcessTryLockDefer(pIoCtx);
    1789     if (rc == VINF_VD_ASYNC_IO_FINISHED)
    1790         rc = VINF_SUCCESS;
    1791 
    1792     if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1793     {
    1794         rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);
    1795         AssertRC(rc);
    1796 
    1797         rc = pDisk->rcSync;
    1798     }
    1799     else /* Success or error. */
    1800         vdIoCtxFree(pDisk, pIoCtx);
    1801 
    1802     return rc;
    1803 }
    1804 
    1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1806 {
    1807     return pDisk->pIoCtxLockOwner == pIoCtx;
    1808 }
    1809 
    1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)
    1811 {
    1812     int rc = VINF_SUCCESS;
    1813 
    1814     VD_IS_LOCKED(pDisk);
    1815 
    1816     LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
    1817 
    1818     if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
    1819     {
    1820         Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
    1821         vdIoCtxDefer(pDisk, pIoCtx);
    1822         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    1823     }
    1824 
    1825     LogFlowFunc(("returns -> %Rrc\n", rc));
    1826     return rc;
    1827 }
    1828 
    1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
    1830 {
    1831     LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
    1832                  pDisk, pIoCtx, fProcessBlockedReqs));
    1833 
    1834     VD_IS_LOCKED(pDisk);
    1835 
    1836     LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
    1837     Assert(pDisk->pIoCtxLockOwner == pIoCtx);
    1838     ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
    1839 
    1840     if (fProcessBlockedReqs)
    1841     {
    1842         /* Process any blocked writes if the current request didn't caused another growing. */
    1843         vdDiskProcessBlockedIoCtx(pDisk);
    1844     }
    1845 
    1846     LogFlowFunc(("returns\n"));
    1847 }
    1848 
    1849 /**
    1850  * internal: read the specified amount of data in whatever blocks the backend
    1851  * will give us - async version.
    1852  */
    1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)
    1854 {
    1855     int rc;
    1856     size_t cbToRead     = pIoCtx->Req.Io.cbTransfer;
    1857     uint64_t uOffset    = pIoCtx->Req.Io.uOffset;
    1858     PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;
    1859     size_t cbThisRead;
    1860 
    1861     /* Loop until all reads started or we have a backend which needs to read metadata. */
    1862     do
    1863     {
    1864         /* Search for image with allocated block. Do not attempt to read more
    1865          * than the previous reads marked as valid. Otherwise this would return
    1866          * stale data when different block sizes are used for the images. */
    1867         cbThisRead = cbToRead;
    1868 
    1869         /*
    1870          * Try to read from the given image.
    1871          * If the block is not allocated read from override chain if present.
    1872          */
    1873         rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1874                                           uOffset, cbThisRead,
    1875                                           pIoCtx, &cbThisRead);
    1876 
    1877         if (rc == VERR_VD_BLOCK_FREE)
    1878         {
    1879             while (   pCurrImage->pPrev != NULL
    1880                    && rc == VERR_VD_BLOCK_FREE)
    1881             {
    1882                 pCurrImage =  pCurrImage->pPrev;
    1883                 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
    1884                                                   uOffset, cbThisRead,
    1885                                                   pIoCtx, &cbThisRead);
    1886             }
    1887         }
    1888 
    1889         /* The task state will be updated on success already, don't do it here!. */
    1890         if (rc == VERR_VD_BLOCK_FREE)
    1891         {
    1892             /* No image in the chain contains the data for the block. */
    1893             vdIoCtxSet(pIoCtx, '\0', cbThisRead);
    1894             ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);
    1895             rc = VINF_SUCCESS;
    1896         }
    1897         else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
    1898             rc = VINF_SUCCESS;
    1899         else if (rc == VERR_VD_IOCTX_HALT)
    1900         {
    1901             uOffset  += cbThisRead;
    1902             cbToRead -= cbThisRead;
    1903             pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
    1904         }
    1905 
    1906         if (RT_FAILURE(rc))
    1907             break;
    1908 
    1909         cbToRead -= cbThisRead;
    1910         uOffset  += cbThisRead;
    1911         pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
    1912     } while (cbToRead != 0 && RT_SUCCESS(rc));
    1913 
    1914     if (   rc == VERR_VD_NOT_ENOUGH_METADATA
    1915         || rc == VERR_VD_IOCTX_HALT)
    1916     {
    1917         /* Save the current state. */
    1918         pIoCtx->Req.Io.uOffset    = uOffset;
    1919         pIoCtx->Req.Io.cbTransfer = cbToRead;
    1920         pIoCtx->Req.Io.pImageCur  = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
    1921     }
    1922 
    1923     return rc;
    1924 }
    1925 
    1926 /**
    1927  * internal: parent image read wrapper for compacting.
    1928  */
    1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
    1930                         size_t cbRead)
    1931 {
    1932     PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
    1933     return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,
    1934                         pvBuf, cbRead, false /* fUpdateCache */);
    19351967}
    19361968
     
    19842016
    19852017/**
    1986  * internal: write a complete block (only used for diff images), taking the
    1987  * remaining data from parent images. This implementation does not optimize
    1988  * anything (except that it tries to read only that portions from parent
    1989  * images that are really needed).
    1990  */
    1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,
    1992                                  PVDIMAGE pImageParentOverride,
    1993                                  uint64_t uOffset, size_t cbWrite,
    1994                                  size_t cbThisWrite, size_t cbPreRead,
    1995                                  size_t cbPostRead, const void *pvBuf,
    1996                                  void *pvTmp)
    1997 {
    1998     int rc = VINF_SUCCESS;
    1999 
    2000     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2001                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2002 
    2003     /* Read the data that goes before the write to fill the block. */
    2004     if (cbPreRead)
    2005     {
    2006         /*
    2007          * Updating the cache doesn't make sense here because
    2008          * this will be done after the complete block was written.
    2009          */
    2010         rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2011                             uOffset - cbPreRead, pvTmp, cbPreRead,
    2012                             true /* fZeroFreeBlocks*/,
    2013                             false /* fUpdateCache */, 0);
    2014         if (RT_FAILURE(rc))
    2015             return rc;
    2016     }
    2017 
    2018     /* Copy the data to the right place in the buffer. */
    2019     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2020 
    2021     /* Read the data that goes after the write to fill the block. */
    2022     if (cbPostRead)
    2023     {
    2024         /* If we have data to be written, use that instead of reading
    2025          * data from the image. */
    2026         size_t cbWriteCopy;
    2027         if (cbWrite > cbThisWrite)
    2028             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2029         else
    2030             cbWriteCopy = 0;
    2031         /* Figure out how much we cannot read from the image, because
    2032          * the last block to write might exceed the nominal size of the
    2033          * image for technical reasons. */
    2034         size_t cbFill;
    2035         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2036             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2037         else
    2038             cbFill = 0;
    2039         /* The rest must be read from the image. */
    2040         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2041 
    2042         /* Now assemble the remaining data. */
    2043         if (cbWriteCopy)
    2044             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2045                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2046         if (cbReadImage)
    2047             rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,
    2048                                 uOffset + cbThisWrite + cbWriteCopy,
    2049                                 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,
    2050                                 cbReadImage, true /* fZeroFreeBlocks */,
    2051                                 false /* fUpdateCache */, 0);
    2052         if (RT_FAILURE(rc))
    2053             return rc;
    2054         /* Zero out the remainder of this block. Will never be visible, as this
    2055          * is beyond the limit of the image. */
    2056         if (cbFill)
    2057             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2058                    '\0', cbFill);
    2059     }
    2060 
    2061     /* Write the full block to the virtual disk. */
    2062     RTSGSEG SegmentBuf;
    2063     RTSGBUF SgBuf;
    2064     VDIOCTX IoCtx;
    2065 
    2066     SegmentBuf.pvSeg = pvTmp;
    2067     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2068     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2069     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2070                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2071     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2072                                         cbPreRead + cbThisWrite + cbPostRead,
    2073                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2074     Assert(rc != VERR_VD_BLOCK_FREE);
    2075     Assert(cbPreRead == 0);
    2076     Assert(cbPostRead == 0);
    2077 
    2078     return rc;
    2079 }
    2080 
    2081 /**
    2082  * internal: write a complete block (only used for diff images), taking the
    2083  * remaining data from parent images. This implementation optimizes out writes
    2084  * that do not change the data relative to the state as of the parent images.
    2085  * All backends which support differential/growing images support this.
    2086  */
    2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,
    2088                                   PVDIMAGE pImageParentOverride,
    2089                                   uint64_t uOffset, size_t cbWrite,
    2090                                   size_t cbThisWrite, size_t cbPreRead,
    2091                                   size_t cbPostRead, const void *pvBuf,
    2092                                   void *pvTmp, unsigned cImagesRead)
    2093 {
    2094     size_t cbFill = 0;
    2095     size_t cbWriteCopy = 0;
    2096     size_t cbReadImage = 0;
    2097     int rc;
    2098 
    2099     LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",
    2100                  pDisk, pImage, pImageParentOverride, uOffset, cbWrite));
    2101 
    2102     if (cbPostRead)
    2103     {
    2104         /* Figure out how much we cannot read from the image, because
    2105          * the last block to write might exceed the nominal size of the
    2106          * image for technical reasons. */
    2107         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2108             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2109 
    2110         /* If we have data to be written, use that instead of reading
    2111          * data from the image. */
    2112         if (cbWrite > cbThisWrite)
    2113             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2114 
    2115         /* The rest must be read from the image. */
    2116         cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2117     }
    2118 
    2119     /* Read the entire data of the block so that we can compare whether it will
    2120      * be modified by the write or not. */
    2121     rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,
    2122                         cbPreRead + cbThisWrite + cbPostRead - cbFill,
    2123                         true /* fZeroFreeBlocks */, false /* fUpdateCache */,
    2124                         cImagesRead);
    2125     if (RT_FAILURE(rc))
    2126         return rc;
    2127 
    2128     /* Check if the write would modify anything in this block. */
    2129     if (   !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)
    2130         && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,
    2131                                     (char *)pvBuf + cbThisWrite, cbWriteCopy)))
    2132     {
    2133         /* Block is completely unchanged, so no need to write anything. */
    2134         return VINF_SUCCESS;
    2135     }
    2136 
    2137     /* Copy the data to the right place in the buffer. */
    2138     memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);
    2139 
    2140     /* Handle the data that goes after the write to fill the block. */
    2141     if (cbPostRead)
    2142     {
    2143         /* Now assemble the remaining data. */
    2144         if (cbWriteCopy)
    2145             memcpy((char *)pvTmp + cbPreRead + cbThisWrite,
    2146                    (char *)pvBuf + cbThisWrite, cbWriteCopy);
    2147         /* Zero out the remainder of this block. Will never be visible, as this
    2148          * is beyond the limit of the image. */
    2149         if (cbFill)
    2150             memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,
    2151                    '\0', cbFill);
    2152     }
    2153 
    2154     /* Write the full block to the virtual disk. */
    2155     RTSGSEG SegmentBuf;
    2156     RTSGBUF SgBuf;
    2157     VDIOCTX IoCtx;
    2158 
    2159     SegmentBuf.pvSeg = pvTmp;
    2160     SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;
    2161     RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2162     vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2163                 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2164     rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,
    2165                                         cbPreRead + cbThisWrite + cbPostRead,
    2166                                         &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    2167     Assert(rc != VERR_VD_BLOCK_FREE);
    2168     Assert(cbPreRead == 0);
    2169     Assert(cbPostRead == 0);
    2170 
    2171     return rc;
    2172 }
    2173 
    2174 /**
    21752018 * internal: write buffer to the image, taking care of block boundaries and
    21762019 * write optimizations.
     
    21812024                           bool fUpdateCache, unsigned cImagesRead)
    21822025{
    2183     int rc;
    2184     unsigned fWrite;
    2185     size_t cbThisWrite;
    2186     size_t cbPreRead, cbPostRead;
    2187     uint64_t uOffsetCur = uOffset;
    2188     size_t cbWriteCur = cbWrite;
    2189     const void *pcvBufCur = pvBuf;
    2190     RTSGSEG SegmentBuf;
     2026    uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
     2027    RTSGSEG Segment;
    21912028    RTSGBUF SgBuf;
    21922029    VDIOCTX IoCtx;
    21932030
    2194     /* Loop until all written. */
    2195     do
    2196     {
    2197         /* Try to write the possibly partial block to the last opened image.
    2198          * This works when the block is already allocated in this image or
    2199          * if it is a full-block write (and allocation isn't suppressed below).
    2200          * For image formats which don't support zero blocks, it's beneficial
    2201          * to avoid unnecessarily allocating unchanged blocks. This prevents
    2202          * unwanted expanding of images. VMDK is an example. */
    2203         cbThisWrite = cbWriteCur;
    2204         fWrite =   (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
    2205                  ? 0 : VD_WRITE_NO_ALLOC;
    2206 
    2207         SegmentBuf.pvSeg = (void *)pcvBufCur;
    2208         SegmentBuf.cbSeg = cbWrite;
    2209         RTSgBufInit(&SgBuf, &SegmentBuf, 1);
    2210         vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,
    2211                     &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
    2212         rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite,
    2213                                             &IoCtx, &cbThisWrite, &cbPreRead,
    2214                                             &cbPostRead, fWrite);
    2215         if (rc == VERR_VD_BLOCK_FREE)
    2216         {
    2217             void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead);
    2218             AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY);
    2219 
    2220             if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME))
    2221             {
    2222                 /* Optimized write, suppress writing to a so far unallocated
    2223                  * block if the data is in fact not changed. */
    2224                 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride,
    2225                                             uOffsetCur, cbWriteCur,
    2226                                             cbThisWrite, cbPreRead, cbPostRead,
    2227                                             pcvBufCur, pvTmp, cImagesRead);
    2228             }
    2229             else
    2230             {
    2231                 /* Normal write, not optimized in any way. The block will
    2232                  * be written no matter what. This will usually (unless the
    2233                  * backend has some further optimization enabled) cause the
    2234                  * block to be allocated. */
    2235                 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride,
    2236                                            uOffsetCur, cbWriteCur,
    2237                                            cbThisWrite, cbPreRead, cbPostRead,
    2238                                            pcvBufCur, pvTmp);
    2239             }
    2240             RTMemTmpFree(pvTmp);
    2241             if (RT_FAILURE(rc))
    2242                 break;
    2243         }
    2244 
    2245         cbWriteCur -= cbThisWrite;
    2246         uOffsetCur += cbThisWrite;
    2247         pcvBufCur = (char *)pcvBufCur + cbThisWrite;
    2248     } while (cbWriteCur != 0 && RT_SUCCESS(rc));
    2249 
    2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */
    2251     /* Update the cache on success */
    2252     if (   RT_SUCCESS(rc)
    2253         && pDisk->pCache
    2254         && fUpdateCache)
    2255         rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL);
    2256 
    2257     if (RT_SUCCESS(rc))
    2258         rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
    2259 #endif
    2260 
    2261     return rc;
     2031    if (fUpdateCache)
     2032        fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE;
     2033
     2034    Segment.pvSeg = (void *)pvBuf;
     2035    Segment.cbSeg = cbWrite;
     2036    RTSgBufInit(&SgBuf, &Segment, 1);
     2037    vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
     2038                NULL, vdWriteHelperAsync, fFlags);
     2039
     2040    IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
     2041    IoCtx.Req.Io.cImagesRead = cImagesRead;
     2042    IoCtx.pIoCtxParent          = NULL;
     2043    IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
     2044    IoCtx.Type.Root.pvUser1     = pDisk;
     2045    IoCtx.Type.Root.pvUser2     = NULL;
     2046    return vdIoCtxProcessSync(&IoCtx);
    22622047}
    22632048
     
    24912276}
    24922277
    2493 /**
    2494  * internal: write a complete block (only used for diff images), taking the
    2495  * remaining data from parent images. This implementation does not optimize
    2496  * anything (except that it tries to read only that portions from parent
    2497  * images that are really needed) - async version.
    2498  */
    2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
    2500 {
    2501     int rc = VINF_SUCCESS;
    2502 
    2503 #if 0
    2504 
    2505     /* Read the data that goes before the write to fill the block. */
    2506     if (cbPreRead)
    2507     {
    2508         rc = vdReadHelperAsync(pIoCtxDst);
    2509         if (RT_FAILURE(rc))
    2510             return rc;
    2511     }
    2512 
    2513     /* Copy the data to the right place in the buffer. */
    2514     vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite);
    2515 
    2516     /* Read the data that goes after the write to fill the block. */
    2517     if (cbPostRead)
    2518     {
    2519         /* If we have data to be written, use that instead of reading
    2520          * data from the image. */
    2521         size_t cbWriteCopy;
    2522         if (cbWrite > cbThisWrite)
    2523             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
    2524         else
    2525             cbWriteCopy = 0;
    2526         /* Figure out how much we cannot read from the image, because
    2527          * the last block to write might exceed the nominal size of the
    2528          * image for technical reasons. */
    2529         size_t cbFill;
    2530         if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
    2531             cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
    2532         else
    2533             cbFill = 0;
    2534         /* The rest must be read from the image. */
    2535         size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;
    2536 
    2537         /* Now assemble the remaining data. */
    2538         if (cbWriteCopy)
    2539         {
    2540             vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy);
    2541             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy);
    2542         }
    2543 
    2544         if (cbReadImage)
    2545             rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst,
    2546                                    uOffset + cbThisWrite + cbWriteCopy,
    2547                                    cbReadImage);
    2548         if (RT_FAILURE(rc))
    2549             return rc;
    2550         /* Zero out the remainder of this block. Will never be visible, as this
    2551          * is beyond the limit of the image. */
    2552         if (cbFill)
    2553         {
    2554             vdIoCtxSet(pIoCtxDst, '\0', cbFill);
    2555             ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill);
    2556         }
    2557     }
    2558 
    2559     if (   !pIoCtxDst->cbTransferLeft
    2560         && !pIoCtxDst->cMetaTransfersPending
    2561         && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false))
    2562     {
    2563         /* Write the full block to the virtual disk. */
    2564         vdIoCtxChildReset(pIoCtxDst);
    2565         rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2566                                             uOffset - cbPreRead,
    2567                                             cbPreRead + cbThisWrite + cbPostRead,
    2568                                             pIoCtxDst,
    2569                                             NULL, &cbPreRead, &cbPostRead, 0);
    2570         Assert(rc != VERR_VD_BLOCK_FREE);
    2571         Assert(cbPreRead == 0);
    2572         Assert(cbPostRead == 0);
    2573     }
    2574     else
    2575     {
    2576         LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
    2577                  pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending,
    2578                  pIoCtxDst->fComplete));
    2579         rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
    2580     }
    2581 
    2582     return rc;
    2583 #endif
    2584     return VERR_NOT_IMPLEMENTED;
    2585 }
    2586 
    2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx)
     2278static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
    25882279{
    25892280    int rc             = VINF_SUCCESS;
     
    25952286    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
    25962287    rc = pImage->Backend->pfnWrite(pImage->pBackendData,
    2597                                         pIoCtx->Req.Io.uOffset - cbPreRead,
    2598                                         cbPreRead + cbThisWrite + cbPostRead,
    2599                                         pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
     2288                                   pIoCtx->Req.Io.uOffset - cbPreRead,
     2289                                   cbPreRead + cbThisWrite + cbPostRead,
     2290                                   pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
    26002291    Assert(rc != VERR_VD_BLOCK_FREE);
    26012292    Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
     
    26862377    /* Write the full block to the virtual disk. */
    26872378    RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
    2688     pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync;
     2379    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
    26892380
    26902381    return rc;
     
    26962387
    26972388    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2389
     2390    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
    26982391
    26992392    if (pIoCtx->Req.Io.cbTransferLeft)
     
    27622455    /* Next step */
    27632456    pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
     2457    return VINF_SUCCESS;
     2458}
     2459
     2460static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
     2461{
     2462    int rc = VINF_SUCCESS;
     2463    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2464    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2465    PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
     2466
     2467    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2468
     2469    vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
     2470    if (cbPostRead)
     2471    {
     2472        size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
     2473        size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
     2474        size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
     2475
     2476        /* Now assemble the remaining data. */
     2477        if (cbWriteCopy)
     2478        {
     2479            /*
     2480             * The S/G buffer of the parent needs to be cloned because
     2481             * it is not allowed to modify the state.
     2482             */
     2483            RTSGBUF SgBufParentTmp;
     2484
     2485            RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
     2486            RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
     2487        }
     2488
     2489        /* Zero out the remainder of this block. Will never be visible, as this
     2490         * is beyond the limit of the image. */
     2491        if (cbFill)
     2492        {
     2493            RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
     2494            vdIoCtxSet(pIoCtx, '\0', cbFill);
     2495        }
     2496
     2497        if (cbReadImage)
     2498        {
     2499            /* Read remaining data. */
     2500        }
     2501        else
     2502        {
     2503            /* Write the full block to the virtual disk. */
     2504            RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2505            pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2506        }
     2507    }
     2508    else
     2509    {
     2510        /* Write the full block to the virtual disk. */
     2511        RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
     2512        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     2513    }
     2514
     2515    return rc;
     2516}
     2517
     2518static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
     2519{
     2520    int rc = VINF_SUCCESS;
     2521
     2522    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2523
     2524    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
     2525
     2526    if (pIoCtx->Req.Io.cbTransferLeft)
     2527        rc = vdReadHelperAsync(pIoCtx);
     2528
     2529    if (   RT_SUCCESS(rc)
     2530        && (   pIoCtx->Req.Io.cbTransferLeft
     2531            || pIoCtx->cMetaTransfersPending))
     2532        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
     2533     else
     2534        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2535
     2536    return rc;
     2537}
     2538
     2539static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
     2540{
     2541    PVBOXHDD pDisk = pIoCtx->pDisk;
     2542    uint64_t uOffset   = pIoCtx->Type.Child.uOffsetSaved;
     2543    size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
     2544    size_t cbPreRead   = pIoCtx->Type.Child.cbPreRead;
     2545    size_t cbPostRead  = pIoCtx->Type.Child.cbPostRead;
     2546    size_t cbWrite     = pIoCtx->Type.Child.cbWriteParent;
     2547    size_t cbFill = 0;
     2548    size_t cbWriteCopy = 0;
     2549    size_t cbReadImage = 0;
     2550
     2551    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
     2552
     2553    AssertPtr(pIoCtx->pIoCtxParent);
     2554    Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
     2555
     2556    /* Calculate the amount of data to read that goes after the write to fill the block. */
     2557    if (cbPostRead)
     2558    {
     2559        /* If we have data to be written, use that instead of reading
     2560         * data from the image. */
     2561        cbWriteCopy;
     2562        if (cbWrite > cbThisWrite)
     2563            cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
     2564
     2565        /* Figure out how much we cannot read from the image, because
     2566         * the last block to write might exceed the nominal size of the
     2567         * image for technical reasons. */
     2568        if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
     2569            cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
     2570
     2571        /* The rest must be read from the image. */
     2572        cbReadImage = cbPostRead - cbWriteCopy - cbFill;
     2573    }
     2574
     2575    pIoCtx->Type.Child.Write.Optimized.cbFill      = cbFill;
     2576    pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
     2577    pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
     2578
     2579    /* Next step */
     2580    if (cbPreRead)
     2581    {
     2582        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
     2583
     2584        /* Read the data that goes before the write to fill the block. */
     2585        pIoCtx->Req.Io.cbTransferLeft = cbPreRead;
     2586        pIoCtx->Req.Io.cbTransfer     = pIoCtx->Req.Io.cbTransferLeft;
     2587        pIoCtx->Req.Io.uOffset       -= cbPreRead;
     2588    }
     2589    else
     2590        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
     2591
    27642592    return VINF_SUCCESS;
    27652593}
     
    96379465                                  pfnComplete, pvUser1, pvUser2,
    96389466                                  NULL, vdReadHelperAsync,
    9639                                   VDIOCTX_FLAGS_DEFAULT);
     9467                                  VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
    96409468        if (!pIoCtx)
    96419469        {
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette