- Timestamp:
- Feb 21, 2013 10:46:14 PM (12 years ago)
- svn:sync-xref-src-repo-rev:
- 83914
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Storage/VD.cpp
r44794 r44799 355 355 /** S/G buffer */ 356 356 RTSGBUF SgBuf; 357 /** Number of bytes to clear in the buffer before the current read. */358 size_t cbBufClear;359 /** Number of images to read. */360 unsigned cImagesRead;361 /** Override for the parent image to start reading from. */362 PVDIMAGE pImageParentOverride;363 357 } Io; 364 358 /** Discard requests. */ … … 430 424 431 425 /** Default flags for an I/O context, i.e. unblocked and async. */ 432 #define VDIOCTX_FLAGS_DEFAULT 426 #define VDIOCTX_FLAGS_DEFAULT (0) 433 427 /** Flag whether the context is blocked. */ 434 #define VDIOCTX_FLAGS_BLOCKED 428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) 435 429 /** Flag whether the I/O context is using synchronous I/O. */ 436 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 437 /** Flag whether the read should update the cache. */ 438 #define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2) 439 /** Flag whether free blocks should be zeroed. 440 * If false and no image has data for sepcified 441 * range VERR_VD_BLOCK_FREE is returned for the I/O context. 442 * Note that unallocated blocks are still zeroed 443 * if at least one image has valid data for a part 444 * of the range. 445 */ 446 #define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) 447 /** Don't free the I/O context when complete because 448 * it was alloacted elsewhere (stack, ...). */ 449 #define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) 430 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 450 431 451 432 /** NIL I/O context pointer value. */ … … 596 577 /** Forward declaration of the async discard helper. */ 597 578 static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); 598 static int vdWriteHelperAsync(PVDIOCTX pIoCtx);599 579 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); 600 580 static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); 601 static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);602 581 603 582 /** … … 830 809 pIoCtx->Req.Io.pImageStart = pImageStart; 831 810 pIoCtx->Req.Io.pImageCur = pImageStart; 832 pIoCtx->Req.Io.cbBufClear = 0;833 pIoCtx->Req.Io.pImageParentOverride = NULL;834 811 pIoCtx->cDataTransfersPending = 0; 835 812 pIoCtx->cMetaTransfersPending = 0; … … 840 817 pIoCtx->pfnIoCtxTransferNext = NULL; 841 818 pIoCtx->rcReq = VINF_SUCCESS; 842 pIoCtx->pIoCtxParent = NULL;843 819 844 820 /* There is no S/G list for a flush request. */ … … 869 845 */ 870 846 static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, 871 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)847 PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead) 872 848 { 873 849 int rc = VINF_SUCCESS; … … 932 908 933 909 /** 934 * Creates a new empty discard state.935 *936 * @returns Pointer to the new discard state or NULL if out of memory.937 */938 static PVDDISCARDSTATE vdDiscardStateCreate(void)939 {940 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));941 942 if (pDiscard)943 {944 RTListInit(&pDiscard->ListLru);945 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));946 if (!pDiscard->pTreeBlocks)947 {948 RTMemFree(pDiscard);949 pDiscard = NULL;950 }951 }952 953 return pDiscard;954 }955 956 /**957 * Removes the least recently used blocks from the waiting list until958 * the new value is reached.959 *960 * @returns VBox status code.961 * @param pDisk VD disk container.962 * @param pDiscard The discard state.963 * @param cbDiscardingNew How many bytes should be waiting on success.964 * The number of bytes waiting can be less.965 */966 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)967 {968 int rc = VINF_SUCCESS;969 970 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",971 pDisk, pDiscard, cbDiscardingNew));972 973 while (pDiscard->cbDiscarding > cbDiscardingNew)974 {975 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);976 977 Assert(!RTListIsEmpty(&pDiscard->ListLru));978 979 /* Go over the allocation bitmap and mark all discarded sectors as unused. */980 uint64_t offStart = pBlock->Core.Key;981 uint32_t idxStart = 0;982 size_t cbLeft = pBlock->cbDiscard;983 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);984 uint32_t cSectors = pBlock->cbDiscard / 512;985 986 while (cbLeft > 0)987 {988 int32_t idxEnd;989 size_t cbThis = cbLeft;990 991 if (fAllocated)992 {993 /* Check for the first unallocated bit. */994 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);995 if (idxEnd != -1)996 {997 cbThis = (idxEnd - idxStart) * 512;998 fAllocated = false;999 }1000 }1001 else1002 {1003 /* Mark as unused and check for the first set bit. */1004 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);1005 if (idxEnd != -1)1006 cbThis = (idxEnd - idxStart) * 512;1007 1008 1009 VDIOCTX IoCtx;1010 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,1011 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);1012 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,1013 &IoCtx, offStart, cbThis, NULL,1014 NULL, &cbThis, NULL,1015 VD_DISCARD_MARK_UNUSED);1016 if (RT_FAILURE(rc))1017 break;1018 1019 fAllocated = true;1020 }1021 1022 idxStart = idxEnd;1023 offStart += cbThis;1024 cbLeft -= cbThis;1025 }1026 1027 if (RT_FAILURE(rc))1028 break;1029 1030 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);1031 Assert(pBlockRemove == pBlock);1032 RTListNodeRemove(&pBlock->NodeLru);1033 1034 pDiscard->cbDiscarding -= pBlock->cbDiscard;1035 RTMemFree(pBlock->pbmAllocated);1036 RTMemFree(pBlock);1037 }1038 1039 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);1040 1041 LogFlowFunc(("returns rc=%Rrc\n", rc));1042 return rc;1043 }1044 1045 /**1046 * Destroys the current discard state, writing any waiting blocks to the image.1047 *1048 * @returns VBox status code.1049 * @param pDisk VD disk container.1050 */1051 static int vdDiscardStateDestroy(PVBOXHDD pDisk)1052 {1053 int rc = VINF_SUCCESS;1054 1055 if (pDisk->pDiscard)1056 {1057 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);1058 AssertRC(rc);1059 RTMemFree(pDisk->pDiscard->pTreeBlocks);1060 RTMemFree(pDisk->pDiscard);1061 pDisk->pDiscard = NULL;1062 }1063 1064 return rc;1065 }1066 1067 /**1068 * Marks the given range as allocated in the image.1069 * Required if there are discards in progress and a write to a block which can get discarded1070 * is written to.1071 *1072 * @returns VBox status code.1073 * @param pDisk VD container data.1074 * @param uOffset First byte to mark as allocated.1075 * @param cbRange Number of bytes to mark as allocated.1076 */1077 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)1078 {1079 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;1080 int rc = VINF_SUCCESS;1081 1082 if (pDiscard)1083 {1084 do1085 {1086 size_t cbThisRange = cbRange;1087 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);1088 1089 if (pBlock)1090 {1091 int32_t idxStart, idxEnd;1092 1093 Assert(!(cbThisRange % 512));1094 Assert(!((uOffset - pBlock->Core.Key) % 512));1095 1096 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);1097 1098 idxStart = (uOffset - pBlock->Core.Key) / 512;1099 idxEnd = idxStart + (cbThisRange / 512);1100 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);1101 }1102 else1103 {1104 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);1105 if (pBlock)1106 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);1107 }1108 1109 Assert(cbRange >= cbThisRange);1110 1111 uOffset += cbThisRange;1112 cbRange -= cbThisRange;1113 } while (cbRange != 0);1114 }1115 1116 return rc;1117 }1118 1119 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1120 uint64_t uOffset, size_t cbTransfer,1121 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,1122 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1123 uint32_t fFlags)1124 {1125 PVDIOCTX pIoCtx = NULL;1126 1127 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1128 if (RT_LIKELY(pIoCtx))1129 {1130 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1131 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1132 }1133 1134 return pIoCtx;1135 }1136 1137 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1138 uint64_t uOffset, size_t cbTransfer,1139 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1140 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1141 void *pvUser1, void *pvUser2,1142 void *pvAllocation,1143 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1144 uint32_t fFlags)1145 {1146 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1147 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1148 1149 if (RT_LIKELY(pIoCtx))1150 {1151 pIoCtx->pIoCtxParent = NULL;1152 pIoCtx->Type.Root.pfnComplete = pfnComplete;1153 pIoCtx->Type.Root.pvUser1 = pvUser1;1154 pIoCtx->Type.Root.pvUser2 = pvUser2;1155 }1156 1157 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));1158 return pIoCtx;1159 }1160 1161 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,1162 unsigned cRanges,1163 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1164 void *pvUser1, void *pvUser2,1165 void *pvAllocation,1166 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1167 uint32_t fFlags)1168 {1169 PVDIOCTX pIoCtx = NULL;1170 1171 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1172 if (RT_LIKELY(pIoCtx))1173 {1174 pIoCtx->pIoCtxNext = NULL;1175 pIoCtx->pDisk = pDisk;1176 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;1177 pIoCtx->cDataTransfersPending = 0;1178 pIoCtx->cMetaTransfersPending = 0;1179 pIoCtx->fComplete = false;1180 pIoCtx->fFlags = fFlags;1181 pIoCtx->pvAllocation = pvAllocation;1182 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;1183 pIoCtx->pfnIoCtxTransferNext = NULL;1184 pIoCtx->rcReq = VINF_SUCCESS;1185 pIoCtx->Req.Discard.paRanges = paRanges;1186 pIoCtx->Req.Discard.cRanges = cRanges;1187 pIoCtx->Req.Discard.idxRange = 0;1188 pIoCtx->Req.Discard.cbDiscardLeft = 0;1189 pIoCtx->Req.Discard.offCur = 0;1190 pIoCtx->Req.Discard.cbThisDiscard = 0;1191 1192 pIoCtx->pIoCtxParent = NULL;1193 pIoCtx->Type.Root.pfnComplete = pfnComplete;1194 pIoCtx->Type.Root.pvUser1 = pvUser1;1195 pIoCtx->Type.Root.pvUser2 = pvUser2;1196 }1197 1198 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));1199 return pIoCtx;1200 }1201 1202 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1203 uint64_t uOffset, size_t cbTransfer,1204 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1205 PVDIOCTX pIoCtxParent, size_t cbTransferParent,1206 size_t cbWriteParent, void *pvAllocation,1207 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)1208 {1209 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1210 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);1211 1212 AssertPtr(pIoCtxParent);1213 Assert(!pIoCtxParent->pIoCtxParent);1214 1215 if (RT_LIKELY(pIoCtx))1216 {1217 pIoCtx->pIoCtxParent = pIoCtxParent;1218 pIoCtx->Type.Child.uOffsetSaved = uOffset;1219 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;1220 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;1221 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;1222 }1223 1224 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));1225 return pIoCtx;1226 }1227 1228 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)1229 {1230 PVDIOTASK pIoTask = NULL;1231 1232 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1233 if (pIoTask)1234 {1235 pIoTask->pIoStorage = pIoStorage;1236 pIoTask->pfnComplete = pfnComplete;1237 pIoTask->pvUser = pvUser;1238 pIoTask->fMeta = false;1239 pIoTask->Type.User.cbTransfer = cbTransfer;1240 pIoTask->Type.User.pIoCtx = pIoCtx;1241 }1242 1243 return pIoTask;1244 }1245 1246 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)1247 {1248 PVDIOTASK pIoTask = NULL;1249 1250 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1251 if (pIoTask)1252 {1253 pIoTask->pIoStorage = pIoStorage;1254 pIoTask->pfnComplete = pfnComplete;1255 pIoTask->pvUser = pvUser;1256 pIoTask->fMeta = true;1257 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;1258 }1259 1260 return pIoTask;1261 }1262 1263 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1264 {1265 LogFlow(("Freeing I/O context %#p\n", pIoCtx));1266 1267 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))1268 {1269 if (pIoCtx->pvAllocation)1270 RTMemFree(pIoCtx->pvAllocation);1271 #ifdef DEBUG1272 memset(pIoCtx, 0xff, sizeof(VDIOCTX));1273 #endif1274 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);1275 }1276 }1277 1278 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)1279 {1280 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);1281 }1282 1283 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)1284 {1285 AssertPtr(pIoCtx->pIoCtxParent);1286 1287 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);1288 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;1289 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;1290 }1291 1292 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)1293 {1294 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));1295 1296 if (RT_LIKELY(pMetaXfer))1297 {1298 pMetaXfer->Core.Key = uOffset;1299 pMetaXfer->Core.KeyLast = uOffset + cb - 1;1300 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;1301 pMetaXfer->cbMeta = cb;1302 pMetaXfer->pIoStorage = pIoStorage;1303 pMetaXfer->cRefs = 0;1304 RTListInit(&pMetaXfer->ListIoCtxWaiting);1305 }1306 return pMetaXfer;1307 }1308 1309 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)1310 {1311 /* Put it on the waiting list. */1312 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);1313 PVDIOCTX pHeadOld;1314 pIoCtx->pIoCtxNext = pNext;1315 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))1316 {1317 pNext = pHeadOld;1318 Assert(pNext != pIoCtx);1319 pIoCtx->pIoCtxNext = pNext;1320 ASMNopPause();1321 }1322 }1323 1324 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1325 {1326 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));1327 1328 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));1329 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1330 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);1331 }1332 1333 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)1334 {1335 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);1336 }1337 1338 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)1339 {1340 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);1341 }1342 1343 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)1344 {1345 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1346 }1347 1348 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)1349 {1350 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1351 }1352 1353 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)1354 {1355 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);1356 }1357 1358 /**1359 * Process the I/O context, core method which assumes that the I/O context1360 * acquired the lock.1361 *1362 * @returns VBox status code.1363 * @param pIoCtx I/O context to process.1364 */1365 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)1366 {1367 int rc = VINF_SUCCESS;1368 1369 VD_IS_LOCKED(pIoCtx->pDisk);1370 1371 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1372 1373 if ( !pIoCtx->cMetaTransfersPending1374 && !pIoCtx->cDataTransfersPending1375 && !pIoCtx->pfnIoCtxTransfer)1376 {1377 rc = VINF_VD_ASYNC_IO_FINISHED;1378 goto out;1379 }1380 1381 /*1382 * We complete the I/O context in case of an error1383 * if there is no I/O task pending.1384 */1385 if ( RT_FAILURE(pIoCtx->rcReq)1386 && !pIoCtx->cMetaTransfersPending1387 && !pIoCtx->cDataTransfersPending)1388 {1389 rc = VINF_VD_ASYNC_IO_FINISHED;1390 goto out;1391 }1392 1393 /* Don't change anything if there is a metadata transfer pending or we are blocked. */1394 if ( pIoCtx->cMetaTransfersPending1395 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))1396 {1397 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1398 goto out;1399 }1400 1401 if (pIoCtx->pfnIoCtxTransfer)1402 {1403 /* Call the transfer function advancing to the next while there is no error. */1404 while ( pIoCtx->pfnIoCtxTransfer1405 && !pIoCtx->cMetaTransfersPending1406 && RT_SUCCESS(rc))1407 {1408 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));1409 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);1410 1411 /* Advance to the next part of the transfer if the current one succeeded. */1412 if (RT_SUCCESS(rc))1413 {1414 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;1415 pIoCtx->pfnIoCtxTransferNext = NULL;1416 }1417 }1418 }1419 1420 if ( RT_SUCCESS(rc)1421 && !pIoCtx->cMetaTransfersPending1422 && !pIoCtx->cDataTransfersPending)1423 rc = VINF_VD_ASYNC_IO_FINISHED;1424 else if ( RT_SUCCESS(rc)1425 || rc == VERR_VD_NOT_ENOUGH_METADATA1426 || rc == VERR_VD_IOCTX_HALT)1427 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1428 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))1429 {1430 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);1431 /*1432 * The I/O context completed if we have an error and there is no data1433 * or meta data transfer pending.1434 */1435 if ( !pIoCtx->cMetaTransfersPending1436 && !pIoCtx->cDataTransfersPending)1437 rc = VINF_VD_ASYNC_IO_FINISHED;1438 else1439 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1440 }1441 1442 out:1443 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",1444 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,1445 pIoCtx->fComplete));1446 1447 return rc;1448 }1449 1450 /**1451 * Processes the list of waiting I/O contexts.1452 *1453 * @returns VBox status code.1454 * @param pDisk The disk structure.1455 * @param pIoCtxRc An I/O context handle which waits on the list. When processed1456 * The status code is returned. NULL if there is no I/O context1457 * to return the status code for.1458 */1459 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)1460 {1461 int rc = VINF_SUCCESS;1462 1463 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));1464 1465 VD_IS_LOCKED(pDisk);1466 1467 /* Get the waiting list and process it in FIFO order. */1468 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);1469 1470 /* Reverse it. */1471 PVDIOCTX pCur = pIoCtxHead;1472 pIoCtxHead = NULL;1473 while (pCur)1474 {1475 PVDIOCTX pInsert = pCur;1476 pCur = pCur->pIoCtxNext;1477 pInsert->pIoCtxNext = pIoCtxHead;1478 pIoCtxHead = pInsert;1479 }1480 1481 /* Process now. */1482 pCur = pIoCtxHead;1483 while (pCur)1484 {1485 int rcTmp;1486 PVDIOCTX pTmp = pCur;1487 1488 pCur = pCur->pIoCtxNext;1489 pTmp->pIoCtxNext = NULL;1490 1491 /*1492 * Need to clear the sync flag here if there is a new I/O context1493 * with it set and the context is not given in pIoCtxRc.1494 * This happens most likely on a different thread and that one shouldn't1495 * process the context synchronously.1496 *1497 * The thread who issued the context will wait on the event semaphore1498 * anyway which is signalled when the completion handler is called.1499 */1500 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC1501 && pTmp != pIoCtxRc)1502 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;1503 1504 rcTmp = vdIoCtxProcessLocked(pTmp);1505 if (pTmp == pIoCtxRc)1506 {1507 /* The given I/O context was processed, pass the return code to the caller. */1508 rc = rcTmp;1509 }1510 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED1511 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1512 {1513 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1514 vdThreadFinishWrite(pDisk);1515 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1516 pTmp->Type.Root.pvUser2,1517 pTmp->rcReq);1518 vdIoCtxFree(pDisk, pTmp);1519 }1520 }1521 1522 LogFlowFunc(("returns rc=%Rrc\n", rc));1523 return rc;1524 }1525 1526 /**1527 * Processes the list of blocked I/O contexts.1528 *1529 * @returns nothing.1530 * @param pDisk The disk structure.1531 */1532 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)1533 {1534 LogFlowFunc(("pDisk=%#p\n", pDisk));1535 1536 VD_IS_LOCKED(pDisk);1537 1538 /* Get the waiting list and process it in FIFO order. */1539 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);1540 1541 /* Reverse it. */1542 PVDIOCTX pCur = pIoCtxHead;1543 pIoCtxHead = NULL;1544 while (pCur)1545 {1546 PVDIOCTX pInsert = pCur;1547 pCur = pCur->pIoCtxNext;1548 pInsert->pIoCtxNext = pIoCtxHead;1549 pIoCtxHead = pInsert;1550 }1551 1552 /* Process now. */1553 pCur = pIoCtxHead;1554 while (pCur)1555 {1556 int rc;1557 PVDIOCTX pTmp = pCur;1558 1559 pCur = pCur->pIoCtxNext;1560 pTmp->pIoCtxNext = NULL;1561 1562 Assert(!pTmp->pIoCtxParent);1563 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);1564 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;1565 1566 rc = vdIoCtxProcessLocked(pTmp);1567 if ( rc == VINF_VD_ASYNC_IO_FINISHED1568 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1569 {1570 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1571 vdThreadFinishWrite(pDisk);1572 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1573 pTmp->Type.Root.pvUser2,1574 pTmp->rcReq);1575 vdIoCtxFree(pDisk, pTmp);1576 }1577 }1578 1579 LogFlowFunc(("returns\n"));1580 }1581 1582 /**1583 * Processes the I/O context trying to lock the criticial section.1584 * The context is deferred if the critical section is busy.1585 *1586 * @returns VBox status code.1587 * @param pIoCtx The I/O context to process.1588 */1589 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)1590 {1591 int rc = VINF_SUCCESS;1592 PVBOXHDD pDisk = pIoCtx->pDisk;1593 1594 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1595 1596 /* Put it on the waiting list first. */1597 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);1598 1599 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))1600 {1601 /* Leave it again, the context will be processed just before leaving the lock. */1602 LogFlowFunc(("Successfully acquired the lock\n"));1603 rc = vdDiskUnlock(pDisk, pIoCtx);1604 }1605 else1606 {1607 LogFlowFunc(("Lock is held\n"));1608 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1609 }1610 1611 return rc;1612 }1613 1614 /**1615 * Process the I/O context in a synchronous manner, waiting1616 * for it to complete.1617 *1618 * @returns VBox status code of the completed request.1619 * @param pIoCtx The sync I/O context.1620 */1621 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)1622 {1623 int rc = VINF_SUCCESS;1624 PVBOXHDD pDisk = pIoCtx->pDisk;1625 1626 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));1627 1628 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,1629 ("I/O context is not marked as synchronous\n"));1630 1631 rc = vdIoCtxProcessTryLockDefer(pIoCtx);1632 if (rc == VINF_VD_ASYNC_IO_FINISHED)1633 rc = VINF_SUCCESS;1634 1635 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1636 {1637 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);1638 AssertRC(rc);1639 1640 rc = pDisk->rcSync;1641 }1642 else /* Success or error. */1643 vdIoCtxFree(pDisk, pIoCtx);1644 1645 return rc;1646 }1647 1648 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1649 {1650 return pDisk->pIoCtxLockOwner == pIoCtx;1651 }1652 1653 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1654 {1655 int rc = VINF_SUCCESS;1656 1657 VD_IS_LOCKED(pDisk);1658 1659 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));1660 1661 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))1662 {1663 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */1664 vdIoCtxDefer(pDisk, pIoCtx);1665 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1666 }1667 1668 LogFlowFunc(("returns -> %Rrc\n", rc));1669 return rc;1670 }1671 1672 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)1673 {1674 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",1675 pDisk, pIoCtx, fProcessBlockedReqs));1676 1677 VD_IS_LOCKED(pDisk);1678 1679 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));1680 Assert(pDisk->pIoCtxLockOwner == pIoCtx);1681 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);1682 1683 if (fProcessBlockedReqs)1684 {1685 /* Process any blocked writes if the current request didn't caused another growing. */1686 vdDiskProcessBlockedIoCtx(pDisk);1687 }1688 1689 LogFlowFunc(("returns\n"));1690 }1691 1692 /**1693 910 * Internal: Reads a given amount of data from the image chain of the disk. 1694 911 **/ 1695 912 static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, 1696 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)913 uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead) 1697 914 { 1698 915 int rc = VINF_SUCCESS; 1699 916 size_t cbThisRead = cbRead; 917 RTSGSEG SegmentBuf; 918 RTSGBUF SgBuf; 919 VDIOCTX IoCtx; 1700 920 1701 921 AssertPtr(pcbThisRead); 1702 922 1703 923 *pcbThisRead = 0; 924 925 SegmentBuf.pvSeg = pvBuf; 926 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE; 927 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 929 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1704 930 1705 931 /* … … 1708 934 */ 1709 935 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1710 uOffset, cbThisRead, pIoCtx,1711 &cbThisRead);936 uOffset, cbThisRead, &IoCtx, 937 &cbThisRead); 1712 938 1713 939 if (rc == VERR_VD_BLOCK_FREE) … … 1718 944 { 1719 945 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1720 uOffset, cbThisRead, pIoCtx,1721 &cbThisRead);946 uOffset, cbThisRead, &IoCtx, 947 &cbThisRead); 1722 948 } 1723 949 } … … 1726 952 *pcbThisRead = cbThisRead; 1727 953 1728 return rc;1729 }1730 1731 /**1732 * internal: read the specified amount of data in whatever blocks the backend1733 * will give us - async version.1734 */1735 static int vdReadHelperAsync(PVDIOCTX pIoCtx)1736 {1737 int rc;1738 PVBOXHDD pDisk = pIoCtx->pDisk;1739 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;1740 uint64_t uOffset = pIoCtx->Req.Io.uOffset;1741 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;1742 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;1743 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;1744 size_t cbThisRead;1745 1746 /* Loop until all reads started or we have a backend which needs to read metadata. */1747 do1748 {1749 /* Search for image with allocated block. Do not attempt to read more1750 * than the previous reads marked as valid. Otherwise this would return1751 * stale data when different block sizes are used for the images. */1752 cbThisRead = cbToRead;1753 1754 if ( pDisk->pCache1755 && !pImageParentOverride)1756 {1757 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,1758 pIoCtx, &cbThisRead);1759 if (rc == VERR_VD_BLOCK_FREE)1760 {1761 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,1762 pIoCtx, &cbThisRead);1763 1764 /* If the read was successful, write the data back into the cache. */1765 if ( RT_SUCCESS(rc)1766 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE)1767 {1768 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,1769 pIoCtx, NULL);1770 }1771 }1772 }1773 else1774 {1775 1776 /*1777 * Try to read from the given image.1778 * If the block is not allocated read from override chain if present.1779 */1780 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1781 uOffset, cbThisRead, pIoCtx,1782 &cbThisRead);1783 1784 if ( rc == VERR_VD_BLOCK_FREE1785 && cImagesRead != 1)1786 {1787 unsigned cImagesToProcess = cImagesRead;1788 1789 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;1790 pIoCtx->Req.Io.pImageParentOverride = NULL;1791 1792 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)1793 {1794 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1795 uOffset, cbThisRead,1796 pIoCtx, &cbThisRead);1797 if (cImagesToProcess == 1)1798 break;1799 else if (cImagesToProcess > 0)1800 cImagesToProcess--;1801 1802 if (rc == VERR_VD_BLOCK_FREE)1803 pCurrImage = pCurrImage->pPrev;1804 }1805 }1806 }1807 1808 /* The task state will be updated on success already, don't do it here!. */1809 if (rc == VERR_VD_BLOCK_FREE)1810 {1811 /* No image in the chain contains the data for the block. */1812 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);1813 1814 /* Fill the free space with 0 if we are told to do so1815 * or a previous read returned valid data. */1816 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)1817 vdIoCtxSet(pIoCtx, '\0', cbThisRead);1818 else1819 pIoCtx->Req.Io.cbBufClear += cbThisRead;1820 1821 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)1822 rc = VINF_VD_NEW_ZEROED_BLOCK;1823 else1824 rc = VINF_SUCCESS;1825 }1826 else if (rc == VERR_VD_IOCTX_HALT)1827 {1828 uOffset += cbThisRead;1829 cbToRead -= cbThisRead;1830 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1831 }1832 else if ( RT_SUCCESS(rc)1833 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1834 {1835 /* First not free block, fill the space before with 0. */1836 if ( pIoCtx->Req.Io.cbBufClear1837 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))1838 {1839 RTSGBUF SgBuf;1840 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);1841 RTSgBufReset(&SgBuf);1842 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);1843 pIoCtx->Req.Io.cbBufClear = 0;1844 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;1845 }1846 rc = VINF_SUCCESS;1847 }1848 1849 if (RT_FAILURE(rc))1850 break;1851 1852 cbToRead -= cbThisRead;1853 uOffset += cbThisRead;1854 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */1855 } while (cbToRead != 0 && RT_SUCCESS(rc));1856 1857 if ( rc == VERR_VD_NOT_ENOUGH_METADATA1858 || rc == VERR_VD_IOCTX_HALT)1859 {1860 /* Save the current state. */1861 pIoCtx->Req.Io.uOffset = uOffset;1862 pIoCtx->Req.Io.cbTransfer = cbToRead;1863 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;1864 }1865 1866 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))1867 ? VERR_VD_BLOCK_FREE1868 : rc;1869 }1870 1871 /**1872 * internal: parent image read wrapper for compacting.1873 */1874 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,1875 size_t cbRead)1876 {1877 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;1878 1879 /** @todo1880 * Only used for compaction so far which is not possible to mix with async I/O.1881 * Needs to be changed if we want to support online compaction of images.1882 */1883 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);1884 AssertMsgReturn(!fLocked,1885 ("Calling synchronous parent read while another thread holds the disk lock\n"),1886 VERR_VD_INVALID_STATE);1887 1888 /* Fake an I/O context. */1889 RTSGSEG Segment;1890 RTSGBUF SgBuf;1891 VDIOCTX IoCtx;1892 1893 Segment.pvSeg = pvBuf;1894 Segment.cbSeg = cbRead;1895 RTSgBufInit(&SgBuf, &Segment, 1);1896 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,1897 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);1898 int rc = vdReadHelperAsync(&IoCtx);1899 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);1900 954 return rc; 1901 955 } … … 1930 984 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) 1931 985 { 1932 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 1933 RTSGSEG Segment; 1934 RTSGBUF SgBuf; 1935 VDIOCTX IoCtx; 1936 1937 if (fZeroFreeBlocks) 1938 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1939 if (fUpdateCache) 1940 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 1941 1942 Segment.pvSeg = pvBuf; 1943 Segment.cbSeg = cbRead; 1944 RTSgBufInit(&SgBuf, &Segment, 1); 1945 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, 1946 NULL, vdReadHelperAsync, fFlags); 1947 1948 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 1949 IoCtx.Req.Io.cImagesRead = cImagesRead; 1950 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 1951 IoCtx.Type.Root.pvUser1 = pDisk; 1952 IoCtx.Type.Root.pvUser2 = NULL; 1953 return vdIoCtxProcessSync(&IoCtx); 986 int rc = VINF_SUCCESS; 987 size_t cbThisRead; 988 bool fAllFree = true; 989 size_t cbBufClear = 0; 990 991 /* Loop until all read. */ 992 do 993 { 994 /* Search for image with allocated block. Do not attempt to read more 995 * than the previous reads marked as valid. Otherwise this would return 996 * stale data when different block sizes are used for the images. */ 997 cbThisRead = cbRead; 998 999 if ( pDisk->pCache 1000 && !pImageParentOverride) 1001 { 1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1003 rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, 1004 cbThisRead, &cbThisRead); 1005 #endif 1006 if (rc == VERR_VD_BLOCK_FREE) 1007 { 1008 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, 1009 &cbThisRead); 1010 1011 /* If the read was successful, write the data back into the cache. */ 1012 if ( RT_SUCCESS(rc) 1013 && fUpdateCache) 1014 { 1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1016 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, 1017 cbThisRead, NULL); 1018 #endif 1019 } 1020 } 1021 } 1022 else 1023 { 1024 RTSGSEG SegmentBuf; 1025 RTSGBUF SgBuf; 1026 VDIOCTX IoCtx; 1027 1028 SegmentBuf.pvSeg = pvBuf; 1029 SegmentBuf.cbSeg = cbThisRead; 1030 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 1031 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 1032 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1033 1034 /* 1035 * Try to read from the given image. 1036 * If the block is not allocated read from override chain if present. 1037 */ 1038 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1039 uOffset, cbThisRead, &IoCtx, 1040 &cbThisRead); 1041 1042 if ( rc == VERR_VD_BLOCK_FREE 1043 && cImagesRead != 1) 1044 { 1045 unsigned cImagesToProcess = cImagesRead; 1046 1047 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; 1048 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; 1049 pCurrImage = pCurrImage->pPrev) 1050 { 1051 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1052 uOffset, cbThisRead, 1053 &IoCtx, &cbThisRead); 1054 if (cImagesToProcess == 1) 1055 break; 1056 else if (cImagesToProcess > 0) 1057 cImagesToProcess--; 1058 } 1059 } 1060 } 1061 1062 /* No image in the chain contains the data for the block. */ 1063 if (rc == VERR_VD_BLOCK_FREE) 1064 { 1065 /* Fill the free space with 0 if we are told to do so 1066 * or a previous read returned valid data. */ 1067 if (fZeroFreeBlocks || !fAllFree) 1068 memset(pvBuf, '\0', cbThisRead); 1069 else 1070 cbBufClear += cbThisRead; 1071 1072 if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1073 rc = VINF_VD_NEW_ZEROED_BLOCK; 1074 else 1075 rc = VINF_SUCCESS; 1076 } 1077 else if (RT_SUCCESS(rc)) 1078 { 1079 /* First not free block, fill the space before with 0. */ 1080 if (!fZeroFreeBlocks) 1081 { 1082 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); 1083 cbBufClear = 0; 1084 fAllFree = false; 1085 } 1086 } 1087 1088 cbRead -= cbThisRead; 1089 uOffset += cbThisRead; 1090 pvBuf = (char *)pvBuf + cbThisRead; 1091 } while (cbRead != 0 && RT_SUCCESS(rc)); 1092 1093 return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; 1954 1094 } 1955 1095 … … 1963 1103 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, 1964 1104 true /* fZeroFreeBlocks */, fUpdateCache, 0); 1105 } 1106 1107 /** 1108 * Creates a new empty discard state. 1109 * 1110 * @returns Pointer to the new discard state or NULL if out of memory. 1111 */ 1112 static PVDDISCARDSTATE vdDiscardStateCreate(void) 1113 { 1114 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE)); 1115 1116 if (pDiscard) 1117 { 1118 RTListInit(&pDiscard->ListLru); 1119 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE)); 1120 if (!pDiscard->pTreeBlocks) 1121 { 1122 RTMemFree(pDiscard); 1123 pDiscard = NULL; 1124 } 1125 } 1126 1127 return pDiscard; 1128 } 1129 1130 /** 1131 * Removes the least recently used blocks from the waiting list until 1132 * the new value is reached. 1133 * 1134 * @returns VBox status code. 1135 * @param pDisk VD disk container. 1136 * @param pDiscard The discard state. 1137 * @param cbDiscardingNew How many bytes should be waiting on success. 1138 * The number of bytes waiting can be less. 1139 */ 1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew) 1141 { 1142 int rc = VINF_SUCCESS; 1143 1144 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n", 1145 pDisk, pDiscard, cbDiscardingNew)); 1146 1147 while (pDiscard->cbDiscarding > cbDiscardingNew) 1148 { 1149 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru); 1150 1151 Assert(!RTListIsEmpty(&pDiscard->ListLru)); 1152 1153 /* Go over the allocation bitmap and mark all discarded sectors as unused. */ 1154 uint64_t offStart = pBlock->Core.Key; 1155 uint32_t idxStart = 0; 1156 size_t cbLeft = pBlock->cbDiscard; 1157 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); 1158 uint32_t cSectors = pBlock->cbDiscard / 512; 1159 1160 while (cbLeft > 0) 1161 { 1162 int32_t idxEnd; 1163 size_t cbThis = cbLeft; 1164 1165 if (fAllocated) 1166 { 1167 /* Check for the first unallocated bit. */ 1168 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart); 1169 if (idxEnd != -1) 1170 { 1171 cbThis = (idxEnd - idxStart) * 512; 1172 fAllocated = false; 1173 } 1174 } 1175 else 1176 { 1177 /* Mark as unused and check for the first set bit. */ 1178 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart); 1179 if (idxEnd != -1) 1180 cbThis = (idxEnd - idxStart) * 512; 1181 1182 1183 VDIOCTX IoCtx; 1184 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, 1185 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1186 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, 1187 &IoCtx, offStart, cbThis, NULL, 1188 NULL, &cbThis, NULL, 1189 VD_DISCARD_MARK_UNUSED); 1190 if (RT_FAILURE(rc)) 1191 break; 1192 1193 fAllocated = true; 1194 } 1195 1196 idxStart = idxEnd; 1197 offStart += cbThis; 1198 cbLeft -= cbThis; 1199 } 1200 1201 if (RT_FAILURE(rc)) 1202 break; 1203 1204 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); 1205 Assert(pBlockRemove == pBlock); 1206 RTListNodeRemove(&pBlock->NodeLru); 1207 1208 pDiscard->cbDiscarding -= pBlock->cbDiscard; 1209 RTMemFree(pBlock->pbmAllocated); 1210 RTMemFree(pBlock); 1211 } 1212 1213 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew); 1214 1215 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1216 return rc; 1217 } 1218 1219 /** 1220 * Destroys the current discard state, writing any waiting blocks to the image. 1221 * 1222 * @returns VBox status code. 1223 * @param pDisk VD disk container. 1224 */ 1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk) 1226 { 1227 int rc = VINF_SUCCESS; 1228 1229 if (pDisk->pDiscard) 1230 { 1231 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */); 1232 AssertRC(rc); 1233 RTMemFree(pDisk->pDiscard->pTreeBlocks); 1234 RTMemFree(pDisk->pDiscard); 1235 pDisk->pDiscard = NULL; 1236 } 1237 1238 return rc; 1239 } 1240 1241 /** 1242 * Marks the given range as allocated in the image. 1243 * Required if there are discards in progress and a write to a block which can get discarded 1244 * is written to. 1245 * 1246 * @returns VBox status code. 1247 * @param pDisk VD container data. 1248 * @param uOffset First byte to mark as allocated. 1249 * @param cbRange Number of bytes to mark as allocated. 1250 */ 1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange) 1252 { 1253 PVDDISCARDSTATE pDiscard = pDisk->pDiscard; 1254 int rc = VINF_SUCCESS; 1255 1256 if (pDiscard) 1257 { 1258 do 1259 { 1260 size_t cbThisRange = cbRange; 1261 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset); 1262 1263 if (pBlock) 1264 { 1265 int32_t idxStart, idxEnd; 1266 1267 Assert(!(cbThisRange % 512)); 1268 Assert(!((uOffset - pBlock->Core.Key) % 512)); 1269 1270 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); 1271 1272 idxStart = (uOffset - pBlock->Core.Key) / 512; 1273 idxEnd = idxStart + (cbThisRange / 512); 1274 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); 1275 } 1276 else 1277 { 1278 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true); 1279 if (pBlock) 1280 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset); 1281 } 1282 1283 Assert(cbRange >= cbThisRange); 1284 1285 uOffset += cbThisRange; 1286 cbRange -= cbThisRange; 1287 } while (cbRange != 0); 1288 } 1289 1290 return rc; 1291 } 1292 1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1294 uint64_t uOffset, size_t cbTransfer, 1295 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, 1296 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1297 uint32_t fFlags) 1298 { 1299 PVDIOCTX pIoCtx = NULL; 1300 1301 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1302 if (RT_LIKELY(pIoCtx)) 1303 { 1304 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1305 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1306 } 1307 1308 return pIoCtx; 1309 } 1310 1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1312 uint64_t uOffset, size_t cbTransfer, 1313 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1314 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1315 void *pvUser1, void *pvUser2, 1316 void *pvAllocation, 1317 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1318 uint32_t fFlags) 1319 { 1320 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1321 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1322 1323 if (RT_LIKELY(pIoCtx)) 1324 { 1325 pIoCtx->pIoCtxParent = NULL; 1326 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1327 pIoCtx->Type.Root.pvUser1 = pvUser1; 1328 pIoCtx->Type.Root.pvUser2 = pvUser2; 1329 } 1330 1331 LogFlow(("Allocated root I/O context %#p\n", pIoCtx)); 1332 return pIoCtx; 1333 } 1334 1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, 1336 unsigned cRanges, 1337 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1338 void *pvUser1, void *pvUser2, 1339 void *pvAllocation, 1340 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1341 uint32_t fFlags) 1342 { 1343 PVDIOCTX pIoCtx = NULL; 1344 1345 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1346 if (RT_LIKELY(pIoCtx)) 1347 { 1348 pIoCtx->pIoCtxNext = NULL; 1349 pIoCtx->pDisk = pDisk; 1350 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD; 1351 pIoCtx->cDataTransfersPending = 0; 1352 pIoCtx->cMetaTransfersPending = 0; 1353 pIoCtx->fComplete = false; 1354 pIoCtx->fFlags = fFlags; 1355 pIoCtx->pvAllocation = pvAllocation; 1356 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; 1357 pIoCtx->pfnIoCtxTransferNext = NULL; 1358 pIoCtx->rcReq = VINF_SUCCESS; 1359 pIoCtx->Req.Discard.paRanges = paRanges; 1360 pIoCtx->Req.Discard.cRanges = cRanges; 1361 pIoCtx->Req.Discard.idxRange = 0; 1362 pIoCtx->Req.Discard.cbDiscardLeft = 0; 1363 pIoCtx->Req.Discard.offCur = 0; 1364 pIoCtx->Req.Discard.cbThisDiscard = 0; 1365 1366 pIoCtx->pIoCtxParent = NULL; 1367 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1368 pIoCtx->Type.Root.pvUser1 = pvUser1; 1369 pIoCtx->Type.Root.pvUser2 = pvUser2; 1370 } 1371 1372 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx)); 1373 return pIoCtx; 1374 } 1375 1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1377 uint64_t uOffset, size_t cbTransfer, 1378 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1379 PVDIOCTX pIoCtxParent, size_t cbTransferParent, 1380 size_t cbWriteParent, void *pvAllocation, 1381 PFNVDIOCTXTRANSFER pfnIoCtxTransfer) 1382 { 1383 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1384 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0); 1385 1386 AssertPtr(pIoCtxParent); 1387 Assert(!pIoCtxParent->pIoCtxParent); 1388 1389 if (RT_LIKELY(pIoCtx)) 1390 { 1391 pIoCtx->pIoCtxParent = pIoCtxParent; 1392 pIoCtx->Type.Child.uOffsetSaved = uOffset; 1393 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer; 1394 pIoCtx->Type.Child.cbTransferParent = cbTransferParent; 1395 pIoCtx->Type.Child.cbWriteParent = cbWriteParent; 1396 } 1397 1398 LogFlow(("Allocated child I/O context %#p\n", pIoCtx)); 1399 return pIoCtx; 1400 } 1401 1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer) 1403 { 1404 PVDIOTASK pIoTask = NULL; 1405 1406 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1407 if (pIoTask) 1408 { 1409 pIoTask->pIoStorage = pIoStorage; 1410 pIoTask->pfnComplete = pfnComplete; 1411 pIoTask->pvUser = pvUser; 1412 pIoTask->fMeta = false; 1413 pIoTask->Type.User.cbTransfer = cbTransfer; 1414 pIoTask->Type.User.pIoCtx = pIoCtx; 1415 } 1416 1417 return pIoTask; 1418 } 1419 1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer) 1421 { 1422 PVDIOTASK pIoTask = NULL; 1423 1424 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1425 if (pIoTask) 1426 { 1427 pIoTask->pIoStorage = pIoStorage; 1428 pIoTask->pfnComplete = pfnComplete; 1429 pIoTask->pvUser = pvUser; 1430 pIoTask->fMeta = true; 1431 pIoTask->Type.Meta.pMetaXfer = pMetaXfer; 1432 } 1433 1434 return pIoTask; 1435 } 1436 1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1438 { 1439 LogFlow(("Freeing I/O context %#p\n", pIoCtx)); 1440 if (pIoCtx->pvAllocation) 1441 RTMemFree(pIoCtx->pvAllocation); 1442 #ifdef DEBUG 1443 memset(pIoCtx, 0xff, sizeof(VDIOCTX)); 1444 #endif 1445 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); 1446 } 1447 1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) 1449 { 1450 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); 1451 } 1452 1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) 1454 { 1455 AssertPtr(pIoCtx->pIoCtxParent); 1456 1457 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 1458 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; 1459 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; 1460 } 1461 1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) 1463 { 1464 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb])); 1465 1466 if (RT_LIKELY(pMetaXfer)) 1467 { 1468 pMetaXfer->Core.Key = uOffset; 1469 pMetaXfer->Core.KeyLast = uOffset + cb - 1; 1470 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE; 1471 pMetaXfer->cbMeta = cb; 1472 pMetaXfer->pIoStorage = pIoStorage; 1473 pMetaXfer->cRefs = 0; 1474 RTListInit(&pMetaXfer->ListIoCtxWaiting); 1475 } 1476 return pMetaXfer; 1477 } 1478 1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) 1480 { 1481 /* Put it on the waiting list. */ 1482 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); 1483 PVDIOCTX pHeadOld; 1484 pIoCtx->pIoCtxNext = pNext; 1485 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) 1486 { 1487 pNext = pHeadOld; 1488 Assert(pNext != pIoCtx); 1489 pIoCtx->pIoCtxNext = pNext; 1490 ASMNopPause(); 1491 } 1492 } 1493 1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1495 { 1496 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); 1497 1498 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); 1499 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1500 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); 1501 } 1502 1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) 1504 { 1505 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData); 1506 } 1507 1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) 1509 { 1510 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); 1511 } 1512 1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) 1514 { 1515 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1516 } 1517 1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) 1519 { 1520 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1521 } 1522 1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) 1524 { 1525 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData); 1526 } 1527 1528 /** 1529 * Process the I/O context, core method which assumes that the I/O context 1530 * acquired the lock. 1531 * 1532 * @returns VBox status code. 1533 * @param pIoCtx I/O context to process. 1534 */ 1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) 1536 { 1537 int rc = VINF_SUCCESS; 1538 1539 VD_IS_LOCKED(pIoCtx->pDisk); 1540 1541 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1542 1543 if ( !pIoCtx->cMetaTransfersPending 1544 && !pIoCtx->cDataTransfersPending 1545 && !pIoCtx->pfnIoCtxTransfer) 1546 { 1547 rc = VINF_VD_ASYNC_IO_FINISHED; 1548 goto out; 1549 } 1550 1551 /* 1552 * We complete the I/O context in case of an error 1553 * if there is no I/O task pending. 1554 */ 1555 if ( RT_FAILURE(pIoCtx->rcReq) 1556 && !pIoCtx->cMetaTransfersPending 1557 && !pIoCtx->cDataTransfersPending) 1558 { 1559 rc = VINF_VD_ASYNC_IO_FINISHED; 1560 goto out; 1561 } 1562 1563 /* Don't change anything if there is a metadata transfer pending or we are blocked. */ 1564 if ( pIoCtx->cMetaTransfersPending 1565 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) 1566 { 1567 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1568 goto out; 1569 } 1570 1571 if (pIoCtx->pfnIoCtxTransfer) 1572 { 1573 /* Call the transfer function advancing to the next while there is no error. */ 1574 while ( pIoCtx->pfnIoCtxTransfer 1575 && !pIoCtx->cMetaTransfersPending 1576 && RT_SUCCESS(rc)) 1577 { 1578 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer)); 1579 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx); 1580 1581 /* Advance to the next part of the transfer if the current one succeeded. */ 1582 if (RT_SUCCESS(rc)) 1583 { 1584 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; 1585 pIoCtx->pfnIoCtxTransferNext = NULL; 1586 } 1587 } 1588 } 1589 1590 if ( RT_SUCCESS(rc) 1591 && !pIoCtx->cMetaTransfersPending 1592 && !pIoCtx->cDataTransfersPending) 1593 rc = VINF_VD_ASYNC_IO_FINISHED; 1594 else if ( RT_SUCCESS(rc) 1595 || rc == VERR_VD_NOT_ENOUGH_METADATA 1596 || rc == VERR_VD_IOCTX_HALT) 1597 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1598 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) 1599 { 1600 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); 1601 /* 1602 * The I/O context completed if we have an error and there is no data 1603 * or meta data transfer pending. 1604 */ 1605 if ( !pIoCtx->cMetaTransfersPending 1606 && !pIoCtx->cDataTransfersPending) 1607 rc = VINF_VD_ASYNC_IO_FINISHED; 1608 else 1609 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1610 } 1611 1612 out: 1613 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 1614 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending, 1615 pIoCtx->fComplete)); 1616 1617 return rc; 1618 } 1619 1620 /** 1621 * Processes the list of waiting I/O contexts. 1622 * 1623 * @returns VBox status code. 1624 * @param pDisk The disk structure. 1625 * @param pIoCtxRc An I/O context handle which waits on the list. When processed 1626 * The status code is returned. NULL if there is no I/O context 1627 * to return the status code for. 1628 */ 1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) 1630 { 1631 int rc = VINF_SUCCESS; 1632 1633 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); 1634 1635 VD_IS_LOCKED(pDisk); 1636 1637 /* Get the waiting list and process it in FIFO order. */ 1638 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); 1639 1640 /* Reverse it. */ 1641 PVDIOCTX pCur = pIoCtxHead; 1642 pIoCtxHead = NULL; 1643 while (pCur) 1644 { 1645 PVDIOCTX pInsert = pCur; 1646 pCur = pCur->pIoCtxNext; 1647 pInsert->pIoCtxNext = pIoCtxHead; 1648 pIoCtxHead = pInsert; 1649 } 1650 1651 /* Process now. */ 1652 pCur = pIoCtxHead; 1653 while (pCur) 1654 { 1655 int rcTmp; 1656 PVDIOCTX pTmp = pCur; 1657 1658 pCur = pCur->pIoCtxNext; 1659 pTmp->pIoCtxNext = NULL; 1660 1661 rcTmp = vdIoCtxProcessLocked(pTmp); 1662 if (pTmp == pIoCtxRc) 1663 { 1664 /* The given I/O context was processed, pass the return code to the caller. */ 1665 rc = rcTmp; 1666 } 1667 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED 1668 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1669 { 1670 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1671 vdThreadFinishWrite(pDisk); 1672 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1673 pTmp->Type.Root.pvUser2, 1674 pTmp->rcReq); 1675 vdIoCtxFree(pDisk, pTmp); 1676 } 1677 } 1678 1679 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1680 return rc; 1681 } 1682 1683 /** 1684 * Processes the list of blocked I/O contexts. 1685 * 1686 * @returns nothing. 1687 * @param pDisk The disk structure. 1688 */ 1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) 1690 { 1691 LogFlowFunc(("pDisk=%#p\n", pDisk)); 1692 1693 VD_IS_LOCKED(pDisk); 1694 1695 /* Get the waiting list and process it in FIFO order. */ 1696 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); 1697 1698 /* Reverse it. */ 1699 PVDIOCTX pCur = pIoCtxHead; 1700 pIoCtxHead = NULL; 1701 while (pCur) 1702 { 1703 PVDIOCTX pInsert = pCur; 1704 pCur = pCur->pIoCtxNext; 1705 pInsert->pIoCtxNext = pIoCtxHead; 1706 pIoCtxHead = pInsert; 1707 } 1708 1709 /* Process now. */ 1710 pCur = pIoCtxHead; 1711 while (pCur) 1712 { 1713 int rc; 1714 PVDIOCTX pTmp = pCur; 1715 1716 pCur = pCur->pIoCtxNext; 1717 pTmp->pIoCtxNext = NULL; 1718 1719 Assert(!pTmp->pIoCtxParent); 1720 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); 1721 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; 1722 1723 rc = vdIoCtxProcessLocked(pTmp); 1724 if ( rc == VINF_VD_ASYNC_IO_FINISHED 1725 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1726 { 1727 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1728 vdThreadFinishWrite(pDisk); 1729 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1730 pTmp->Type.Root.pvUser2, 1731 pTmp->rcReq); 1732 vdIoCtxFree(pDisk, pTmp); 1733 } 1734 } 1735 1736 LogFlowFunc(("returns\n")); 1737 } 1738 1739 /** 1740 * Processes the I/O context trying to lock the criticial section. 1741 * The context is deferred if the critical section is busy. 1742 * 1743 * @returns VBox status code. 1744 * @param pIoCtx The I/O context to process. 1745 */ 1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) 1747 { 1748 int rc = VINF_SUCCESS; 1749 PVBOXHDD pDisk = pIoCtx->pDisk; 1750 1751 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1752 1753 /* Put it on the waiting list first. */ 1754 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); 1755 1756 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) 1757 { 1758 /* Leave it again, the context will be processed just before leaving the lock. */ 1759 LogFlowFunc(("Successfully acquired the lock\n")); 1760 rc = vdDiskUnlock(pDisk, pIoCtx); 1761 } 1762 else 1763 { 1764 LogFlowFunc(("Lock is held\n")); 1765 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1766 } 1767 1768 return rc; 1769 } 1770 1771 /** 1772 * Process the I/O context in a synchronous manner, waiting 1773 * for it to complete. 1774 * 1775 * @returns VBox status code of the completed request. 1776 * @param pIoCtx The sync I/O context. 1777 */ 1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) 1779 { 1780 int rc = VINF_SUCCESS; 1781 PVBOXHDD pDisk = pIoCtx->pDisk; 1782 1783 LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); 1784 1785 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, 1786 ("I/O context is not marked as synchronous\n")); 1787 1788 rc = vdIoCtxProcessTryLockDefer(pIoCtx); 1789 if (rc == VINF_VD_ASYNC_IO_FINISHED) 1790 rc = VINF_SUCCESS; 1791 1792 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1793 { 1794 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); 1795 AssertRC(rc); 1796 1797 rc = pDisk->rcSync; 1798 } 1799 else /* Success or error. */ 1800 vdIoCtxFree(pDisk, pIoCtx); 1801 1802 return rc; 1803 } 1804 1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1806 { 1807 return pDisk->pIoCtxLockOwner == pIoCtx; 1808 } 1809 1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1811 { 1812 int rc = VINF_SUCCESS; 1813 1814 VD_IS_LOCKED(pDisk); 1815 1816 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); 1817 1818 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) 1819 { 1820 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ 1821 vdIoCtxDefer(pDisk, pIoCtx); 1822 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1823 } 1824 1825 LogFlowFunc(("returns -> %Rrc\n", rc)); 1826 return rc; 1827 } 1828 1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) 1830 { 1831 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", 1832 pDisk, pIoCtx, fProcessBlockedReqs)); 1833 1834 VD_IS_LOCKED(pDisk); 1835 1836 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); 1837 Assert(pDisk->pIoCtxLockOwner == pIoCtx); 1838 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); 1839 1840 if (fProcessBlockedReqs) 1841 { 1842 /* Process any blocked writes if the current request didn't caused another growing. */ 1843 vdDiskProcessBlockedIoCtx(pDisk); 1844 } 1845 1846 LogFlowFunc(("returns\n")); 1847 } 1848 1849 /** 1850 * internal: read the specified amount of data in whatever blocks the backend 1851 * will give us - async version. 1852 */ 1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx) 1854 { 1855 int rc; 1856 size_t cbToRead = pIoCtx->Req.Io.cbTransfer; 1857 uint64_t uOffset = pIoCtx->Req.Io.uOffset; 1858 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;; 1859 size_t cbThisRead; 1860 1861 /* Loop until all reads started or we have a backend which needs to read metadata. */ 1862 do 1863 { 1864 /* Search for image with allocated block. Do not attempt to read more 1865 * than the previous reads marked as valid. Otherwise this would return 1866 * stale data when different block sizes are used for the images. */ 1867 cbThisRead = cbToRead; 1868 1869 /* 1870 * Try to read from the given image. 1871 * If the block is not allocated read from override chain if present. 1872 */ 1873 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1874 uOffset, cbThisRead, 1875 pIoCtx, &cbThisRead); 1876 1877 if (rc == VERR_VD_BLOCK_FREE) 1878 { 1879 while ( pCurrImage->pPrev != NULL 1880 && rc == VERR_VD_BLOCK_FREE) 1881 { 1882 pCurrImage = pCurrImage->pPrev; 1883 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1884 uOffset, cbThisRead, 1885 pIoCtx, &cbThisRead); 1886 } 1887 } 1888 1889 /* The task state will be updated on success already, don't do it here!. */ 1890 if (rc == VERR_VD_BLOCK_FREE) 1891 { 1892 /* No image in the chain contains the data for the block. */ 1893 vdIoCtxSet(pIoCtx, '\0', cbThisRead); 1894 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); 1895 rc = VINF_SUCCESS; 1896 } 1897 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1898 rc = VINF_SUCCESS; 1899 else if (rc == VERR_VD_IOCTX_HALT) 1900 { 1901 uOffset += cbThisRead; 1902 cbToRead -= cbThisRead; 1903 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1904 } 1905 1906 if (RT_FAILURE(rc)) 1907 break; 1908 1909 cbToRead -= cbThisRead; 1910 uOffset += cbThisRead; 1911 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */ 1912 } while (cbToRead != 0 && RT_SUCCESS(rc)); 1913 1914 if ( rc == VERR_VD_NOT_ENOUGH_METADATA 1915 || rc == VERR_VD_IOCTX_HALT) 1916 { 1917 /* Save the current state. */ 1918 pIoCtx->Req.Io.uOffset = uOffset; 1919 pIoCtx->Req.Io.cbTransfer = cbToRead; 1920 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; 1921 } 1922 1923 return rc; 1924 } 1925 1926 /** 1927 * internal: parent image read wrapper for compacting. 1928 */ 1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, 1930 size_t cbRead) 1931 { 1932 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; 1933 return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset, 1934 pvBuf, cbRead, false /* fUpdateCache */); 1965 1935 } 1966 1936 … … 2014 1984 2015 1985 /** 1986 * internal: write a complete block (only used for diff images), taking the 1987 * remaining data from parent images. This implementation does not optimize 1988 * anything (except that it tries to read only that portions from parent 1989 * images that are really needed). 1990 */ 1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage, 1992 PVDIMAGE pImageParentOverride, 1993 uint64_t uOffset, size_t cbWrite, 1994 size_t cbThisWrite, size_t cbPreRead, 1995 size_t cbPostRead, const void *pvBuf, 1996 void *pvTmp) 1997 { 1998 int rc = VINF_SUCCESS; 1999 2000 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n", 2001 pDisk, pImage, pImageParentOverride, uOffset, cbWrite)); 2002 2003 /* Read the data that goes before the write to fill the block. */ 2004 if (cbPreRead) 2005 { 2006 /* 2007 * Updating the cache doesn't make sense here because 2008 * this will be done after the complete block was written. 2009 */ 2010 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, 2011 uOffset - cbPreRead, pvTmp, cbPreRead, 2012 true /* fZeroFreeBlocks*/, 2013 false /* fUpdateCache */, 0); 2014 if (RT_FAILURE(rc)) 2015 return rc; 2016 } 2017 2018 /* Copy the data to the right place in the buffer. */ 2019 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); 2020 2021 /* Read the data that goes after the write to fill the block. */ 2022 if (cbPostRead) 2023 { 2024 /* If we have data to be written, use that instead of reading 2025 * data from the image. */ 2026 size_t cbWriteCopy; 2027 if (cbWrite > cbThisWrite) 2028 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2029 else 2030 cbWriteCopy = 0; 2031 /* Figure out how much we cannot read from the image, because 2032 * the last block to write might exceed the nominal size of the 2033 * image for technical reasons. */ 2034 size_t cbFill; 2035 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2036 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2037 else 2038 cbFill = 0; 2039 /* The rest must be read from the image. */ 2040 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2041 2042 /* Now assemble the remaining data. */ 2043 if (cbWriteCopy) 2044 memcpy((char *)pvTmp + cbPreRead + cbThisWrite, 2045 (char *)pvBuf + cbThisWrite, cbWriteCopy); 2046 if (cbReadImage) 2047 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, 2048 uOffset + cbThisWrite + cbWriteCopy, 2049 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy, 2050 cbReadImage, true /* fZeroFreeBlocks */, 2051 false /* fUpdateCache */, 0); 2052 if (RT_FAILURE(rc)) 2053 return rc; 2054 /* Zero out the remainder of this block. Will never be visible, as this 2055 * is beyond the limit of the image. */ 2056 if (cbFill) 2057 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, 2058 '\0', cbFill); 2059 } 2060 2061 /* Write the full block to the virtual disk. */ 2062 RTSGSEG SegmentBuf; 2063 RTSGBUF SgBuf; 2064 VDIOCTX IoCtx; 2065 2066 SegmentBuf.pvSeg = pvTmp; 2067 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead; 2068 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2069 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2070 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2071 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead, 2072 cbPreRead + cbThisWrite + cbPostRead, 2073 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2074 Assert(rc != VERR_VD_BLOCK_FREE); 2075 Assert(cbPreRead == 0); 2076 Assert(cbPostRead == 0); 2077 2078 return rc; 2079 } 2080 2081 /** 2082 * internal: write a complete block (only used for diff images), taking the 2083 * remaining data from parent images. This implementation optimizes out writes 2084 * that do not change the data relative to the state as of the parent images. 2085 * All backends which support differential/growing images support this. 2086 */ 2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage, 2088 PVDIMAGE pImageParentOverride, 2089 uint64_t uOffset, size_t cbWrite, 2090 size_t cbThisWrite, size_t cbPreRead, 2091 size_t cbPostRead, const void *pvBuf, 2092 void *pvTmp, unsigned cImagesRead) 2093 { 2094 size_t cbFill = 0; 2095 size_t cbWriteCopy = 0; 2096 size_t cbReadImage = 0; 2097 int rc; 2098 2099 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n", 2100 pDisk, pImage, pImageParentOverride, uOffset, cbWrite)); 2101 2102 if (cbPostRead) 2103 { 2104 /* Figure out how much we cannot read from the image, because 2105 * the last block to write might exceed the nominal size of the 2106 * image for technical reasons. */ 2107 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2108 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2109 2110 /* If we have data to be written, use that instead of reading 2111 * data from the image. */ 2112 if (cbWrite > cbThisWrite) 2113 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2114 2115 /* The rest must be read from the image. */ 2116 cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2117 } 2118 2119 /* Read the entire data of the block so that we can compare whether it will 2120 * be modified by the write or not. */ 2121 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp, 2122 cbPreRead + cbThisWrite + cbPostRead - cbFill, 2123 true /* fZeroFreeBlocks */, false /* fUpdateCache */, 2124 cImagesRead); 2125 if (RT_FAILURE(rc)) 2126 return rc; 2127 2128 /* Check if the write would modify anything in this block. */ 2129 if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite) 2130 && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite, 2131 (char *)pvBuf + cbThisWrite, cbWriteCopy))) 2132 { 2133 /* Block is completely unchanged, so no need to write anything. */ 2134 return VINF_SUCCESS; 2135 } 2136 2137 /* Copy the data to the right place in the buffer. */ 2138 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); 2139 2140 /* Handle the data that goes after the write to fill the block. */ 2141 if (cbPostRead) 2142 { 2143 /* Now assemble the remaining data. */ 2144 if (cbWriteCopy) 2145 memcpy((char *)pvTmp + cbPreRead + cbThisWrite, 2146 (char *)pvBuf + cbThisWrite, cbWriteCopy); 2147 /* Zero out the remainder of this block. Will never be visible, as this 2148 * is beyond the limit of the image. */ 2149 if (cbFill) 2150 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, 2151 '\0', cbFill); 2152 } 2153 2154 /* Write the full block to the virtual disk. */ 2155 RTSGSEG SegmentBuf; 2156 RTSGBUF SgBuf; 2157 VDIOCTX IoCtx; 2158 2159 SegmentBuf.pvSeg = pvTmp; 2160 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead; 2161 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2162 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2163 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2164 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead, 2165 cbPreRead + cbThisWrite + cbPostRead, 2166 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2167 Assert(rc != VERR_VD_BLOCK_FREE); 2168 Assert(cbPreRead == 0); 2169 Assert(cbPostRead == 0); 2170 2171 return rc; 2172 } 2173 2174 /** 2016 2175 * internal: write buffer to the image, taking care of block boundaries and 2017 2176 * write optimizations. … … 2022 2181 bool fUpdateCache, unsigned cImagesRead) 2023 2182 { 2024 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 2025 RTSGSEG Segment; 2183 int rc; 2184 unsigned fWrite; 2185 size_t cbThisWrite; 2186 size_t cbPreRead, cbPostRead; 2187 uint64_t uOffsetCur = uOffset; 2188 size_t cbWriteCur = cbWrite; 2189 const void *pcvBufCur = pvBuf; 2190 RTSGSEG SegmentBuf; 2026 2191 RTSGBUF SgBuf; 2027 2192 VDIOCTX IoCtx; 2028 2193 2029 if (fUpdateCache) 2030 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 2031 2032 Segment.pvSeg = (void *)pvBuf; 2033 Segment.cbSeg = cbWrite; 2034 RTSgBufInit(&SgBuf, &Segment, 1); 2035 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, 2036 NULL, vdWriteHelperAsync, fFlags); 2037 2038 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 2039 IoCtx.Req.Io.cImagesRead = cImagesRead; 2040 IoCtx.pIoCtxParent = NULL; 2041 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 2042 IoCtx.Type.Root.pvUser1 = pDisk; 2043 IoCtx.Type.Root.pvUser2 = NULL; 2044 return vdIoCtxProcessSync(&IoCtx); 2194 /* Loop until all written. */ 2195 do 2196 { 2197 /* Try to write the possibly partial block to the last opened image. 2198 * This works when the block is already allocated in this image or 2199 * if it is a full-block write (and allocation isn't suppressed below). 2200 * For image formats which don't support zero blocks, it's beneficial 2201 * to avoid unnecessarily allocating unchanged blocks. This prevents 2202 * unwanted expanding of images. VMDK is an example. */ 2203 cbThisWrite = cbWriteCur; 2204 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) 2205 ? 0 : VD_WRITE_NO_ALLOC; 2206 2207 SegmentBuf.pvSeg = (void *)pcvBufCur; 2208 SegmentBuf.cbSeg = cbWrite; 2209 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2210 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2211 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2212 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite, 2213 &IoCtx, &cbThisWrite, &cbPreRead, 2214 &cbPostRead, fWrite); 2215 if (rc == VERR_VD_BLOCK_FREE) 2216 { 2217 void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); 2218 AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); 2219 2220 if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) 2221 { 2222 /* Optimized write, suppress writing to a so far unallocated 2223 * block if the data is in fact not changed. */ 2224 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, 2225 uOffsetCur, cbWriteCur, 2226 cbThisWrite, cbPreRead, cbPostRead, 2227 pcvBufCur, pvTmp, cImagesRead); 2228 } 2229 else 2230 { 2231 /* Normal write, not optimized in any way. The block will 2232 * be written no matter what. This will usually (unless the 2233 * backend has some further optimization enabled) cause the 2234 * block to be allocated. */ 2235 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, 2236 uOffsetCur, cbWriteCur, 2237 cbThisWrite, cbPreRead, cbPostRead, 2238 pcvBufCur, pvTmp); 2239 } 2240 RTMemTmpFree(pvTmp); 2241 if (RT_FAILURE(rc)) 2242 break; 2243 } 2244 2245 cbWriteCur -= cbThisWrite; 2246 uOffsetCur += cbThisWrite; 2247 pcvBufCur = (char *)pcvBufCur + cbThisWrite; 2248 } while (cbWriteCur != 0 && RT_SUCCESS(rc)); 2249 2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */ 2251 /* Update the cache on success */ 2252 if ( RT_SUCCESS(rc) 2253 && pDisk->pCache 2254 && fUpdateCache) 2255 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); 2256 2257 if (RT_SUCCESS(rc)) 2258 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); 2259 #endif 2260 2261 return rc; 2045 2262 } 2046 2263 … … 2274 2491 } 2275 2492 2276 static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) 2493 /** 2494 * internal: write a complete block (only used for diff images), taking the 2495 * remaining data from parent images. This implementation does not optimize 2496 * anything (except that it tries to read only that portions from parent 2497 * images that are really needed) - async version. 2498 */ 2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2500 { 2501 int rc = VINF_SUCCESS; 2502 2503 #if 0 2504 2505 /* Read the data that goes before the write to fill the block. */ 2506 if (cbPreRead) 2507 { 2508 rc = vdReadHelperAsync(pIoCtxDst); 2509 if (RT_FAILURE(rc)) 2510 return rc; 2511 } 2512 2513 /* Copy the data to the right place in the buffer. */ 2514 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); 2515 2516 /* Read the data that goes after the write to fill the block. */ 2517 if (cbPostRead) 2518 { 2519 /* If we have data to be written, use that instead of reading 2520 * data from the image. */ 2521 size_t cbWriteCopy; 2522 if (cbWrite > cbThisWrite) 2523 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2524 else 2525 cbWriteCopy = 0; 2526 /* Figure out how much we cannot read from the image, because 2527 * the last block to write might exceed the nominal size of the 2528 * image for technical reasons. */ 2529 size_t cbFill; 2530 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2531 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2532 else 2533 cbFill = 0; 2534 /* The rest must be read from the image. */ 2535 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2536 2537 /* Now assemble the remaining data. */ 2538 if (cbWriteCopy) 2539 { 2540 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); 2541 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); 2542 } 2543 2544 if (cbReadImage) 2545 rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, 2546 uOffset + cbThisWrite + cbWriteCopy, 2547 cbReadImage); 2548 if (RT_FAILURE(rc)) 2549 return rc; 2550 /* Zero out the remainder of this block. Will never be visible, as this 2551 * is beyond the limit of the image. */ 2552 if (cbFill) 2553 { 2554 vdIoCtxSet(pIoCtxDst, '\0', cbFill); 2555 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); 2556 } 2557 } 2558 2559 if ( !pIoCtxDst->cbTransferLeft 2560 && !pIoCtxDst->cMetaTransfersPending 2561 && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) 2562 { 2563 /* Write the full block to the virtual disk. */ 2564 vdIoCtxChildReset(pIoCtxDst); 2565 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2566 uOffset - cbPreRead, 2567 cbPreRead + cbThisWrite + cbPostRead, 2568 pIoCtxDst, 2569 NULL, &cbPreRead, &cbPostRead, 0); 2570 Assert(rc != VERR_VD_BLOCK_FREE); 2571 Assert(cbPreRead == 0); 2572 Assert(cbPostRead == 0); 2573 } 2574 else 2575 { 2576 LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 2577 pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, 2578 pIoCtxDst->fComplete)); 2579 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2580 } 2581 2582 return rc; 2583 #endif 2584 return VERR_NOT_IMPLEMENTED; 2585 } 2586 2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) 2277 2588 { 2278 2589 int rc = VINF_SUCCESS; … … 2284 2595 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2285 2596 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2286 pIoCtx->Req.Io.uOffset - cbPreRead,2287 cbPreRead + cbThisWrite + cbPostRead,2288 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);2597 pIoCtx->Req.Io.uOffset - cbPreRead, 2598 cbPreRead + cbThisWrite + cbPostRead, 2599 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2289 2600 Assert(rc != VERR_VD_BLOCK_FREE); 2290 2601 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); … … 2375 2686 /* Write the full block to the virtual disk. */ 2376 2687 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2377 pIoCtx->pfnIoCtxTransferNext = vdWriteHelper CommitAsync;2688 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync; 2378 2689 2379 2690 return rc; … … 2385 2696 2386 2697 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2387 2388 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;2389 2698 2390 2699 if (pIoCtx->Req.Io.cbTransferLeft) … … 2453 2762 /* Next step */ 2454 2763 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync; 2455 return VINF_SUCCESS;2456 }2457 2458 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)2459 {2460 int rc = VINF_SUCCESS;2461 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;2462 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;2463 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;2464 2465 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2466 2467 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);2468 if (cbPostRead)2469 {2470 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;2471 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;2472 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;2473 2474 /* Now assemble the remaining data. */2475 if (cbWriteCopy)2476 {2477 /*2478 * The S/G buffer of the parent needs to be cloned because2479 * it is not allowed to modify the state.2480 */2481 RTSGBUF SgBufParentTmp;2482 2483 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);2484 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);2485 }2486 2487 /* Zero out the remainder of this block. Will never be visible, as this2488 * is beyond the limit of the image. */2489 if (cbFill)2490 {2491 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);2492 vdIoCtxSet(pIoCtx, '\0', cbFill);2493 }2494 2495 if (cbReadImage)2496 {2497 /* Read remaining data. */2498 }2499 else2500 {2501 /* Write the full block to the virtual disk. */2502 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);2503 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;2504 }2505 }2506 else2507 {2508 /* Write the full block to the virtual disk. */2509 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);2510 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;2511 }2512 2513 return rc;2514 }2515 2516 static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)2517 {2518 int rc = VINF_SUCCESS;2519 2520 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2521 2522 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;2523 2524 if (pIoCtx->Req.Io.cbTransferLeft)2525 rc = vdReadHelperAsync(pIoCtx);2526 2527 if ( RT_SUCCESS(rc)2528 && ( pIoCtx->Req.Io.cbTransferLeft2529 || pIoCtx->cMetaTransfersPending))2530 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;2531 else2532 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;2533 2534 return rc;2535 }2536 2537 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)2538 {2539 PVBOXHDD pDisk = pIoCtx->pDisk;2540 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;2541 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;2542 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;2543 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;2544 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;2545 size_t cbFill = 0;2546 size_t cbWriteCopy = 0;2547 size_t cbReadImage = 0;2548 2549 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2550 2551 AssertPtr(pIoCtx->pIoCtxParent);2552 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);2553 2554 /* Calculate the amount of data to read that goes after the write to fill the block. */2555 if (cbPostRead)2556 {2557 /* If we have data to be written, use that instead of reading2558 * data from the image. */2559 cbWriteCopy;2560 if (cbWrite > cbThisWrite)2561 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2562 2563 /* Figure out how much we cannot read from the image, because2564 * the last block to write might exceed the nominal size of the2565 * image for technical reasons. */2566 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2567 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2568 2569 /* The rest must be read from the image. */2570 cbReadImage = cbPostRead - cbWriteCopy - cbFill;2571 }2572 2573 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;2574 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;2575 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;2576 2577 /* Next step */2578 if (cbPreRead)2579 {2580 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;2581 2582 /* Read the data that goes before the write to fill the block. */2583 pIoCtx->Req.Io.cbTransferLeft = cbPreRead;2584 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;2585 pIoCtx->Req.Io.uOffset -= cbPreRead;2586 }2587 else2588 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;2589 2590 2764 return VINF_SUCCESS; 2591 2765 } … … 9463 9637 pfnComplete, pvUser1, pvUser2, 9464 9638 NULL, vdReadHelperAsync, 9465 VDIOCTX_FLAGS_ ZERO_FREE_BLOCKS);9639 VDIOCTX_FLAGS_DEFAULT); 9466 9640 if (!pIoCtx) 9467 9641 {
Note:
See TracChangeset
for help on using the changeset viewer.