- Timestamp:
- Feb 11, 2013 11:15:42 AM (12 years ago)
- svn:sync-xref-src-repo-rev:
- 83715
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Storage/VD.cpp
r44616 r44628 355 355 /** S/G buffer */ 356 356 RTSGBUF SgBuf; 357 /** Flag whether all reads from the image chain returned VERR_VD_BLOCK_FREE358 * so far. */359 bool fAllFree;360 /** Number of bytes to clear in the buffer before the current read. */361 size_t cbBufClear;362 /** Number of images to read. */363 unsigned cImagesRead;364 /** Override for the parent image to start reading from. */365 PVDIMAGE pImageParentOverride;366 357 } Io; 367 358 /** Discard requests. */ … … 433 424 434 425 /** Default flags for an I/O context, i.e. unblocked and async. */ 435 #define VDIOCTX_FLAGS_DEFAULT 426 #define VDIOCTX_FLAGS_DEFAULT (0) 436 427 /** Flag whether the context is blocked. */ 437 #define VDIOCTX_FLAGS_BLOCKED 428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) 438 429 /** Flag whether the I/O context is using synchronous I/O. */ 439 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 440 /** Flag whether the read should update the cache. */ 441 #define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2) 442 /** Flag whether free blocks should be zeroed. 443 * If false and no image has data for sepcified 444 * range VERR_VD_BLOCK_FREE is returned for the I/O context. 445 * Note that unallocated blocks are still zeroed 446 * if at least one image has valid data for a part 447 * of the range. 448 */ 449 #define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) 450 /** Don't free the I/O context when complete because 451 * it was alloacted elsewhere (stack, ...). */ 452 #define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) 430 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 453 431 454 432 /** NIL I/O context pointer value. */ … … 599 577 /** Forward declaration of the async discard helper. */ 600 578 static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); 601 static int vdWriteHelperAsync(PVDIOCTX pIoCtx);602 579 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); 603 580 static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); 604 static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);605 581 606 582 /** … … 833 809 pIoCtx->Req.Io.pImageStart = pImageStart; 834 810 pIoCtx->Req.Io.pImageCur = pImageStart; 835 pIoCtx->Req.Io.fAllFree = true;836 pIoCtx->Req.Io.cbBufClear = 0;837 pIoCtx->Req.Io.pImageParentOverride = NULL;838 811 pIoCtx->cDataTransfersPending = 0; 839 812 pIoCtx->cMetaTransfersPending = 0; … … 844 817 pIoCtx->pfnIoCtxTransferNext = NULL; 845 818 pIoCtx->rcReq = VINF_SUCCESS; 846 pIoCtx->pIoCtxParent = NULL;847 819 848 820 /* There is no S/G list for a flush request. */ … … 873 845 */ 874 846 static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, 875 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)847 PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead) 876 848 { 877 849 int rc = VINF_SUCCESS; … … 936 908 937 909 /** 938 * Creates a new empty discard state.939 *940 * @returns Pointer to the new discard state or NULL if out of memory.941 */942 static PVDDISCARDSTATE vdDiscardStateCreate(void)943 {944 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));945 946 if (pDiscard)947 {948 RTListInit(&pDiscard->ListLru);949 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));950 if (!pDiscard->pTreeBlocks)951 {952 RTMemFree(pDiscard);953 pDiscard = NULL;954 }955 }956 957 return pDiscard;958 }959 960 /**961 * Removes the least recently used blocks from the waiting list until962 * the new value is reached.963 *964 * @returns VBox status code.965 * @param pDisk VD disk container.966 * @param pDiscard The discard state.967 * @param cbDiscardingNew How many bytes should be waiting on success.968 * The number of bytes waiting can be less.969 */970 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)971 {972 int rc = VINF_SUCCESS;973 974 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",975 pDisk, pDiscard, cbDiscardingNew));976 977 while (pDiscard->cbDiscarding > cbDiscardingNew)978 {979 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);980 981 Assert(!RTListIsEmpty(&pDiscard->ListLru));982 983 /* Go over the allocation bitmap and mark all discarded sectors as unused. */984 uint64_t offStart = pBlock->Core.Key;985 uint32_t idxStart = 0;986 size_t cbLeft = pBlock->cbDiscard;987 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);988 uint32_t cSectors = pBlock->cbDiscard / 512;989 990 while (cbLeft > 0)991 {992 int32_t idxEnd;993 size_t cbThis = cbLeft;994 995 if (fAllocated)996 {997 /* Check for the first unallocated bit. */998 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);999 if (idxEnd != -1)1000 {1001 cbThis = (idxEnd - idxStart) * 512;1002 fAllocated = false;1003 }1004 }1005 else1006 {1007 /* Mark as unused and check for the first set bit. */1008 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);1009 if (idxEnd != -1)1010 cbThis = (idxEnd - idxStart) * 512;1011 1012 1013 VDIOCTX IoCtx;1014 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,1015 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);1016 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,1017 &IoCtx, offStart, cbThis, NULL,1018 NULL, &cbThis, NULL,1019 VD_DISCARD_MARK_UNUSED);1020 if (RT_FAILURE(rc))1021 break;1022 1023 fAllocated = true;1024 }1025 1026 idxStart = idxEnd;1027 offStart += cbThis;1028 cbLeft -= cbThis;1029 }1030 1031 if (RT_FAILURE(rc))1032 break;1033 1034 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);1035 Assert(pBlockRemove == pBlock);1036 RTListNodeRemove(&pBlock->NodeLru);1037 1038 pDiscard->cbDiscarding -= pBlock->cbDiscard;1039 RTMemFree(pBlock->pbmAllocated);1040 RTMemFree(pBlock);1041 }1042 1043 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);1044 1045 LogFlowFunc(("returns rc=%Rrc\n", rc));1046 return rc;1047 }1048 1049 /**1050 * Destroys the current discard state, writing any waiting blocks to the image.1051 *1052 * @returns VBox status code.1053 * @param pDisk VD disk container.1054 */1055 static int vdDiscardStateDestroy(PVBOXHDD pDisk)1056 {1057 int rc = VINF_SUCCESS;1058 1059 if (pDisk->pDiscard)1060 {1061 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);1062 AssertRC(rc);1063 RTMemFree(pDisk->pDiscard->pTreeBlocks);1064 RTMemFree(pDisk->pDiscard);1065 pDisk->pDiscard = NULL;1066 }1067 1068 return rc;1069 }1070 1071 /**1072 * Marks the given range as allocated in the image.1073 * Required if there are discards in progress and a write to a block which can get discarded1074 * is written to.1075 *1076 * @returns VBox status code.1077 * @param pDisk VD container data.1078 * @param uOffset First byte to mark as allocated.1079 * @param cbRange Number of bytes to mark as allocated.1080 */1081 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)1082 {1083 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;1084 int rc = VINF_SUCCESS;1085 1086 if (pDiscard)1087 {1088 do1089 {1090 size_t cbThisRange = cbRange;1091 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);1092 1093 if (pBlock)1094 {1095 int32_t idxStart, idxEnd;1096 1097 Assert(!(cbThisRange % 512));1098 Assert(!((uOffset - pBlock->Core.Key) % 512));1099 1100 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);1101 1102 idxStart = (uOffset - pBlock->Core.Key) / 512;1103 idxEnd = idxStart + (cbThisRange / 512);1104 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);1105 }1106 else1107 {1108 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);1109 if (pBlock)1110 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);1111 }1112 1113 Assert(cbRange >= cbThisRange);1114 1115 uOffset += cbThisRange;1116 cbRange -= cbThisRange;1117 } while (cbRange != 0);1118 }1119 1120 return rc;1121 }1122 1123 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1124 uint64_t uOffset, size_t cbTransfer,1125 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,1126 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1127 uint32_t fFlags)1128 {1129 PVDIOCTX pIoCtx = NULL;1130 1131 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1132 if (RT_LIKELY(pIoCtx))1133 {1134 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1135 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1136 }1137 1138 return pIoCtx;1139 }1140 1141 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1142 uint64_t uOffset, size_t cbTransfer,1143 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1144 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1145 void *pvUser1, void *pvUser2,1146 void *pvAllocation,1147 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1148 uint32_t fFlags)1149 {1150 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1151 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1152 1153 if (RT_LIKELY(pIoCtx))1154 {1155 pIoCtx->pIoCtxParent = NULL;1156 pIoCtx->Type.Root.pfnComplete = pfnComplete;1157 pIoCtx->Type.Root.pvUser1 = pvUser1;1158 pIoCtx->Type.Root.pvUser2 = pvUser2;1159 }1160 1161 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));1162 return pIoCtx;1163 }1164 1165 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,1166 unsigned cRanges,1167 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1168 void *pvUser1, void *pvUser2,1169 void *pvAllocation,1170 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1171 uint32_t fFlags)1172 {1173 PVDIOCTX pIoCtx = NULL;1174 1175 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1176 if (RT_LIKELY(pIoCtx))1177 {1178 pIoCtx->pIoCtxNext = NULL;1179 pIoCtx->pDisk = pDisk;1180 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;1181 pIoCtx->cDataTransfersPending = 0;1182 pIoCtx->cMetaTransfersPending = 0;1183 pIoCtx->fComplete = false;1184 pIoCtx->fFlags = fFlags;1185 pIoCtx->pvAllocation = pvAllocation;1186 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;1187 pIoCtx->pfnIoCtxTransferNext = NULL;1188 pIoCtx->rcReq = VINF_SUCCESS;1189 pIoCtx->Req.Discard.paRanges = paRanges;1190 pIoCtx->Req.Discard.cRanges = cRanges;1191 pIoCtx->Req.Discard.idxRange = 0;1192 pIoCtx->Req.Discard.cbDiscardLeft = 0;1193 pIoCtx->Req.Discard.offCur = 0;1194 pIoCtx->Req.Discard.cbThisDiscard = 0;1195 1196 pIoCtx->pIoCtxParent = NULL;1197 pIoCtx->Type.Root.pfnComplete = pfnComplete;1198 pIoCtx->Type.Root.pvUser1 = pvUser1;1199 pIoCtx->Type.Root.pvUser2 = pvUser2;1200 }1201 1202 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));1203 return pIoCtx;1204 }1205 1206 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1207 uint64_t uOffset, size_t cbTransfer,1208 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1209 PVDIOCTX pIoCtxParent, size_t cbTransferParent,1210 size_t cbWriteParent, void *pvAllocation,1211 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)1212 {1213 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1214 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);1215 1216 AssertPtr(pIoCtxParent);1217 Assert(!pIoCtxParent->pIoCtxParent);1218 1219 if (RT_LIKELY(pIoCtx))1220 {1221 pIoCtx->pIoCtxParent = pIoCtxParent;1222 pIoCtx->Type.Child.uOffsetSaved = uOffset;1223 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;1224 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;1225 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;1226 }1227 1228 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));1229 return pIoCtx;1230 }1231 1232 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)1233 {1234 PVDIOTASK pIoTask = NULL;1235 1236 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1237 if (pIoTask)1238 {1239 pIoTask->pIoStorage = pIoStorage;1240 pIoTask->pfnComplete = pfnComplete;1241 pIoTask->pvUser = pvUser;1242 pIoTask->fMeta = false;1243 pIoTask->Type.User.cbTransfer = cbTransfer;1244 pIoTask->Type.User.pIoCtx = pIoCtx;1245 }1246 1247 return pIoTask;1248 }1249 1250 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)1251 {1252 PVDIOTASK pIoTask = NULL;1253 1254 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1255 if (pIoTask)1256 {1257 pIoTask->pIoStorage = pIoStorage;1258 pIoTask->pfnComplete = pfnComplete;1259 pIoTask->pvUser = pvUser;1260 pIoTask->fMeta = true;1261 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;1262 }1263 1264 return pIoTask;1265 }1266 1267 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1268 {1269 LogFlow(("Freeing I/O context %#p\n", pIoCtx));1270 1271 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))1272 {1273 if (pIoCtx->pvAllocation)1274 RTMemFree(pIoCtx->pvAllocation);1275 #ifdef DEBUG1276 memset(pIoCtx, 0xff, sizeof(VDIOCTX));1277 #endif1278 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);1279 }1280 }1281 1282 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)1283 {1284 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);1285 }1286 1287 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)1288 {1289 AssertPtr(pIoCtx->pIoCtxParent);1290 1291 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);1292 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;1293 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;1294 }1295 1296 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)1297 {1298 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));1299 1300 if (RT_LIKELY(pMetaXfer))1301 {1302 pMetaXfer->Core.Key = uOffset;1303 pMetaXfer->Core.KeyLast = uOffset + cb - 1;1304 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;1305 pMetaXfer->cbMeta = cb;1306 pMetaXfer->pIoStorage = pIoStorage;1307 pMetaXfer->cRefs = 0;1308 RTListInit(&pMetaXfer->ListIoCtxWaiting);1309 }1310 return pMetaXfer;1311 }1312 1313 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)1314 {1315 /* Put it on the waiting list. */1316 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);1317 PVDIOCTX pHeadOld;1318 pIoCtx->pIoCtxNext = pNext;1319 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))1320 {1321 pNext = pHeadOld;1322 Assert(pNext != pIoCtx);1323 pIoCtx->pIoCtxNext = pNext;1324 ASMNopPause();1325 }1326 }1327 1328 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1329 {1330 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));1331 1332 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));1333 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1334 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);1335 }1336 1337 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)1338 {1339 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);1340 }1341 1342 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)1343 {1344 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);1345 }1346 1347 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)1348 {1349 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1350 }1351 1352 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)1353 {1354 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1355 }1356 1357 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)1358 {1359 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);1360 }1361 1362 /**1363 * Process the I/O context, core method which assumes that the I/O context1364 * acquired the lock.1365 *1366 * @returns VBox status code.1367 * @param pIoCtx I/O context to process.1368 */1369 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)1370 {1371 int rc = VINF_SUCCESS;1372 1373 VD_IS_LOCKED(pIoCtx->pDisk);1374 1375 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1376 1377 if ( !pIoCtx->cMetaTransfersPending1378 && !pIoCtx->cDataTransfersPending1379 && !pIoCtx->pfnIoCtxTransfer)1380 {1381 rc = VINF_VD_ASYNC_IO_FINISHED;1382 goto out;1383 }1384 1385 /*1386 * We complete the I/O context in case of an error1387 * if there is no I/O task pending.1388 */1389 if ( RT_FAILURE(pIoCtx->rcReq)1390 && !pIoCtx->cMetaTransfersPending1391 && !pIoCtx->cDataTransfersPending)1392 {1393 rc = VINF_VD_ASYNC_IO_FINISHED;1394 goto out;1395 }1396 1397 /* Don't change anything if there is a metadata transfer pending or we are blocked. */1398 if ( pIoCtx->cMetaTransfersPending1399 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))1400 {1401 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1402 goto out;1403 }1404 1405 if (pIoCtx->pfnIoCtxTransfer)1406 {1407 /* Call the transfer function advancing to the next while there is no error. */1408 while ( pIoCtx->pfnIoCtxTransfer1409 && !pIoCtx->cMetaTransfersPending1410 && RT_SUCCESS(rc))1411 {1412 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));1413 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);1414 1415 /* Advance to the next part of the transfer if the current one succeeded. */1416 if (RT_SUCCESS(rc))1417 {1418 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;1419 pIoCtx->pfnIoCtxTransferNext = NULL;1420 }1421 }1422 }1423 1424 if ( RT_SUCCESS(rc)1425 && !pIoCtx->cMetaTransfersPending1426 && !pIoCtx->cDataTransfersPending)1427 rc = VINF_VD_ASYNC_IO_FINISHED;1428 else if ( RT_SUCCESS(rc)1429 || rc == VERR_VD_NOT_ENOUGH_METADATA1430 || rc == VERR_VD_IOCTX_HALT)1431 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1432 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))1433 {1434 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);1435 /*1436 * The I/O context completed if we have an error and there is no data1437 * or meta data transfer pending.1438 */1439 if ( !pIoCtx->cMetaTransfersPending1440 && !pIoCtx->cDataTransfersPending)1441 rc = VINF_VD_ASYNC_IO_FINISHED;1442 else1443 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1444 }1445 1446 out:1447 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",1448 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,1449 pIoCtx->fComplete));1450 1451 return rc;1452 }1453 1454 /**1455 * Processes the list of waiting I/O contexts.1456 *1457 * @returns VBox status code.1458 * @param pDisk The disk structure.1459 * @param pIoCtxRc An I/O context handle which waits on the list. When processed1460 * The status code is returned. NULL if there is no I/O context1461 * to return the status code for.1462 */1463 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)1464 {1465 int rc = VINF_SUCCESS;1466 1467 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));1468 1469 VD_IS_LOCKED(pDisk);1470 1471 /* Get the waiting list and process it in FIFO order. */1472 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);1473 1474 /* Reverse it. */1475 PVDIOCTX pCur = pIoCtxHead;1476 pIoCtxHead = NULL;1477 while (pCur)1478 {1479 PVDIOCTX pInsert = pCur;1480 pCur = pCur->pIoCtxNext;1481 pInsert->pIoCtxNext = pIoCtxHead;1482 pIoCtxHead = pInsert;1483 }1484 1485 /* Process now. */1486 pCur = pIoCtxHead;1487 while (pCur)1488 {1489 int rcTmp;1490 PVDIOCTX pTmp = pCur;1491 1492 pCur = pCur->pIoCtxNext;1493 pTmp->pIoCtxNext = NULL;1494 1495 /*1496 * Need to clear the sync flag here if there is a new I/O context1497 * with it set and the context is not given in pIoCtxRc.1498 * This happens most likely on a different thread and that one shouldn't1499 * process the context synchronously.1500 *1501 * The thread who issued the context will wait on the event semaphore1502 * anyway which is signalled when the completion handler is called.1503 */1504 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC1505 && pTmp != pIoCtxRc)1506 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;1507 1508 rcTmp = vdIoCtxProcessLocked(pTmp);1509 if (pTmp == pIoCtxRc)1510 {1511 /* The given I/O context was processed, pass the return code to the caller. */1512 rc = rcTmp;1513 }1514 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED1515 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1516 {1517 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1518 vdThreadFinishWrite(pDisk);1519 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1520 pTmp->Type.Root.pvUser2,1521 pTmp->rcReq);1522 vdIoCtxFree(pDisk, pTmp);1523 }1524 }1525 1526 LogFlowFunc(("returns rc=%Rrc\n", rc));1527 return rc;1528 }1529 1530 /**1531 * Processes the list of blocked I/O contexts.1532 *1533 * @returns nothing.1534 * @param pDisk The disk structure.1535 */1536 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)1537 {1538 LogFlowFunc(("pDisk=%#p\n", pDisk));1539 1540 VD_IS_LOCKED(pDisk);1541 1542 /* Get the waiting list and process it in FIFO order. */1543 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);1544 1545 /* Reverse it. */1546 PVDIOCTX pCur = pIoCtxHead;1547 pIoCtxHead = NULL;1548 while (pCur)1549 {1550 PVDIOCTX pInsert = pCur;1551 pCur = pCur->pIoCtxNext;1552 pInsert->pIoCtxNext = pIoCtxHead;1553 pIoCtxHead = pInsert;1554 }1555 1556 /* Process now. */1557 pCur = pIoCtxHead;1558 while (pCur)1559 {1560 int rc;1561 PVDIOCTX pTmp = pCur;1562 1563 pCur = pCur->pIoCtxNext;1564 pTmp->pIoCtxNext = NULL;1565 1566 Assert(!pTmp->pIoCtxParent);1567 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);1568 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;1569 1570 rc = vdIoCtxProcessLocked(pTmp);1571 if ( rc == VINF_VD_ASYNC_IO_FINISHED1572 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1573 {1574 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1575 vdThreadFinishWrite(pDisk);1576 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1577 pTmp->Type.Root.pvUser2,1578 pTmp->rcReq);1579 vdIoCtxFree(pDisk, pTmp);1580 }1581 }1582 1583 LogFlowFunc(("returns\n"));1584 }1585 1586 /**1587 * Processes the I/O context trying to lock the criticial section.1588 * The context is deferred if the critical section is busy.1589 *1590 * @returns VBox status code.1591 * @param pIoCtx The I/O context to process.1592 */1593 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)1594 {1595 int rc = VINF_SUCCESS;1596 PVBOXHDD pDisk = pIoCtx->pDisk;1597 1598 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1599 1600 /* Put it on the waiting list first. */1601 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);1602 1603 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))1604 {1605 /* Leave it again, the context will be processed just before leaving the lock. */1606 LogFlowFunc(("Successfully acquired the lock\n"));1607 rc = vdDiskUnlock(pDisk, pIoCtx);1608 }1609 else1610 {1611 LogFlowFunc(("Lock is held\n"));1612 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1613 }1614 1615 return rc;1616 }1617 1618 /**1619 * Process the I/O context in a synchronous manner, waiting1620 * for it to complete.1621 *1622 * @returns VBox status code of the completed request.1623 * @param pIoCtx The sync I/O context.1624 */1625 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)1626 {1627 int rc = VINF_SUCCESS;1628 PVBOXHDD pDisk = pIoCtx->pDisk;1629 1630 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));1631 1632 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,1633 ("I/O context is not marked as synchronous\n"));1634 1635 rc = vdIoCtxProcessTryLockDefer(pIoCtx);1636 if (rc == VINF_VD_ASYNC_IO_FINISHED)1637 rc = VINF_SUCCESS;1638 1639 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1640 {1641 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);1642 AssertRC(rc);1643 1644 rc = pDisk->rcSync;1645 }1646 else /* Success or error. */1647 vdIoCtxFree(pDisk, pIoCtx);1648 1649 return rc;1650 }1651 1652 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1653 {1654 return pDisk->pIoCtxLockOwner == pIoCtx;1655 }1656 1657 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1658 {1659 int rc = VINF_SUCCESS;1660 1661 VD_IS_LOCKED(pDisk);1662 1663 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));1664 1665 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))1666 {1667 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */1668 vdIoCtxDefer(pDisk, pIoCtx);1669 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1670 }1671 1672 LogFlowFunc(("returns -> %Rrc\n", rc));1673 return rc;1674 }1675 1676 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)1677 {1678 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",1679 pDisk, pIoCtx, fProcessBlockedReqs));1680 1681 VD_IS_LOCKED(pDisk);1682 1683 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));1684 Assert(pDisk->pIoCtxLockOwner == pIoCtx);1685 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);1686 1687 if (fProcessBlockedReqs)1688 {1689 /* Process any blocked writes if the current request didn't caused another growing. */1690 vdDiskProcessBlockedIoCtx(pDisk);1691 }1692 1693 LogFlowFunc(("returns\n"));1694 }1695 1696 /**1697 910 * Internal: Reads a given amount of data from the image chain of the disk. 1698 911 **/ 1699 912 static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, 1700 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)913 uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead) 1701 914 { 1702 915 int rc = VINF_SUCCESS; 1703 916 size_t cbThisRead = cbRead; 917 RTSGSEG SegmentBuf; 918 RTSGBUF SgBuf; 919 VDIOCTX IoCtx; 1704 920 1705 921 AssertPtr(pcbThisRead); 1706 922 1707 923 *pcbThisRead = 0; 924 925 SegmentBuf.pvSeg = pvBuf; 926 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE; 927 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 929 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1708 930 1709 931 /* … … 1712 934 */ 1713 935 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1714 uOffset, cbThisRead, pIoCtx,1715 &cbThisRead);936 uOffset, cbThisRead, &IoCtx, 937 &cbThisRead); 1716 938 1717 939 if (rc == VERR_VD_BLOCK_FREE) … … 1722 944 { 1723 945 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1724 uOffset, cbThisRead, pIoCtx,1725 &cbThisRead);946 uOffset, cbThisRead, &IoCtx, 947 &cbThisRead); 1726 948 } 1727 949 } … … 1730 952 *pcbThisRead = cbThisRead; 1731 953 1732 return rc;1733 }1734 1735 /**1736 * internal: read the specified amount of data in whatever blocks the backend1737 * will give us - async version.1738 */1739 static int vdReadHelperAsync(PVDIOCTX pIoCtx)1740 {1741 int rc;1742 PVBOXHDD pDisk = pIoCtx->pDisk;1743 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;1744 uint64_t uOffset = pIoCtx->Req.Io.uOffset;1745 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;1746 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;1747 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;1748 size_t cbThisRead;1749 1750 /* Loop until all reads started or we have a backend which needs to read metadata. */1751 do1752 {1753 /* Search for image with allocated block. Do not attempt to read more1754 * than the previous reads marked as valid. Otherwise this would return1755 * stale data when different block sizes are used for the images. */1756 cbThisRead = cbToRead;1757 1758 if ( pDisk->pCache1759 && !pImageParentOverride)1760 {1761 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,1762 pIoCtx, &cbThisRead);1763 if (rc == VERR_VD_BLOCK_FREE)1764 {1765 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,1766 pIoCtx, &cbThisRead);1767 1768 /* If the read was successful, write the data back into the cache. */1769 if ( RT_SUCCESS(rc)1770 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE)1771 {1772 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,1773 pIoCtx, NULL);1774 }1775 }1776 }1777 else1778 {1779 1780 /*1781 * Try to read from the given image.1782 * If the block is not allocated read from override chain if present.1783 */1784 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1785 uOffset, cbThisRead, pIoCtx,1786 &cbThisRead);1787 1788 if ( rc == VERR_VD_BLOCK_FREE1789 && cImagesRead != 1)1790 {1791 unsigned cImagesToProcess = cImagesRead;1792 1793 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;1794 pIoCtx->Req.Io.pImageParentOverride = NULL;1795 1796 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)1797 {1798 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1799 uOffset, cbThisRead,1800 pIoCtx, &cbThisRead);1801 if (cImagesToProcess == 1)1802 break;1803 else if (cImagesToProcess > 0)1804 cImagesToProcess--;1805 1806 if (rc == VERR_VD_BLOCK_FREE)1807 pCurrImage = pCurrImage->pPrev;1808 }1809 }1810 }1811 1812 /* The task state will be updated on success already, don't do it here!. */1813 if (rc == VERR_VD_BLOCK_FREE)1814 {1815 /* No image in the chain contains the data for the block. */1816 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);1817 1818 /* Fill the free space with 0 if we are told to do so1819 * or a previous read returned valid data. */1820 if ( (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)1821 || !pIoCtx->Req.Io.fAllFree)1822 vdIoCtxSet(pIoCtx, '\0', cbThisRead);1823 else1824 pIoCtx->Req.Io.cbBufClear += cbThisRead;1825 1826 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)1827 rc = VINF_VD_NEW_ZEROED_BLOCK;1828 else1829 rc = VINF_SUCCESS;1830 }1831 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1832 rc = VINF_SUCCESS;1833 else if (rc == VERR_VD_IOCTX_HALT)1834 {1835 uOffset += cbThisRead;1836 cbToRead -= cbThisRead;1837 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1838 }1839 else if ( RT_SUCCESS(rc)1840 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))1841 {1842 /* First not free block, fill the space before with 0. */1843 RTSGBUF SgBuf;1844 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);1845 RTSgBufReset(&SgBuf);1846 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);1847 pIoCtx->Req.Io.cbBufClear = 0;1848 pIoCtx->Req.Io.fAllFree = false;1849 }1850 1851 if (RT_FAILURE(rc))1852 break;1853 1854 cbToRead -= cbThisRead;1855 uOffset += cbThisRead;1856 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */1857 } while (cbToRead != 0 && RT_SUCCESS(rc));1858 1859 if ( rc == VERR_VD_NOT_ENOUGH_METADATA1860 || rc == VERR_VD_IOCTX_HALT)1861 {1862 /* Save the current state. */1863 pIoCtx->Req.Io.uOffset = uOffset;1864 pIoCtx->Req.Io.cbTransfer = cbToRead;1865 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;1866 }1867 1868 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) && pIoCtx->Req.Io.fAllFree)1869 ? VERR_VD_BLOCK_FREE1870 : rc;1871 }1872 1873 /**1874 * internal: parent image read wrapper for compacting.1875 */1876 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,1877 size_t cbRead)1878 {1879 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;1880 1881 /** @todo1882 * Only used for compaction so far which is not possible to mix with async I/O.1883 * Needs to be changed if we want to support online compaction of images.1884 */1885 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);1886 AssertMsgReturn(!fLocked,1887 ("Calling synchronous parent read while another thread holds the disk lock\n"),1888 VERR_VD_INVALID_STATE);1889 1890 /* Fake an I/O context. */1891 RTSGSEG Segment;1892 RTSGBUF SgBuf;1893 VDIOCTX IoCtx;1894 1895 Segment.pvSeg = pvBuf;1896 Segment.cbSeg = cbRead;1897 RTSgBufInit(&SgBuf, &Segment, 1);1898 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,1899 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);1900 int rc = vdReadHelperAsync(&IoCtx);1901 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);1902 954 return rc; 1903 955 } … … 1932 984 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) 1933 985 { 1934 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 1935 RTSGSEG Segment; 1936 RTSGBUF SgBuf; 1937 VDIOCTX IoCtx; 1938 1939 if (fZeroFreeBlocks) 1940 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1941 if (fUpdateCache) 1942 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 1943 1944 Segment.pvSeg = pvBuf; 1945 Segment.cbSeg = cbRead; 1946 RTSgBufInit(&SgBuf, &Segment, 1); 1947 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, 1948 NULL, vdReadHelperAsync, fFlags); 1949 1950 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 1951 IoCtx.Req.Io.cImagesRead = cImagesRead; 1952 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 1953 IoCtx.Type.Root.pvUser1 = pDisk; 1954 IoCtx.Type.Root.pvUser2 = NULL; 1955 return vdIoCtxProcessSync(&IoCtx); 986 int rc = VINF_SUCCESS; 987 size_t cbThisRead; 988 bool fAllFree = true; 989 size_t cbBufClear = 0; 990 991 /* Loop until all read. */ 992 do 993 { 994 /* Search for image with allocated block. Do not attempt to read more 995 * than the previous reads marked as valid. Otherwise this would return 996 * stale data when different block sizes are used for the images. */ 997 cbThisRead = cbRead; 998 999 if ( pDisk->pCache 1000 && !pImageParentOverride) 1001 { 1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1003 rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, 1004 cbThisRead, &cbThisRead); 1005 #endif 1006 if (rc == VERR_VD_BLOCK_FREE) 1007 { 1008 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, 1009 &cbThisRead); 1010 1011 /* If the read was successful, write the data back into the cache. */ 1012 if ( RT_SUCCESS(rc) 1013 && fUpdateCache) 1014 { 1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1016 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, 1017 cbThisRead, NULL); 1018 #endif 1019 } 1020 } 1021 } 1022 else 1023 { 1024 RTSGSEG SegmentBuf; 1025 RTSGBUF SgBuf; 1026 VDIOCTX IoCtx; 1027 1028 SegmentBuf.pvSeg = pvBuf; 1029 SegmentBuf.cbSeg = cbThisRead; 1030 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 1031 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 1032 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1033 1034 /* 1035 * Try to read from the given image. 1036 * If the block is not allocated read from override chain if present. 1037 */ 1038 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1039 uOffset, cbThisRead, &IoCtx, 1040 &cbThisRead); 1041 1042 if ( rc == VERR_VD_BLOCK_FREE 1043 && cImagesRead != 1) 1044 { 1045 unsigned cImagesToProcess = cImagesRead; 1046 1047 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; 1048 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; 1049 pCurrImage = pCurrImage->pPrev) 1050 { 1051 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1052 uOffset, cbThisRead, 1053 &IoCtx, &cbThisRead); 1054 if (cImagesToProcess == 1) 1055 break; 1056 else if (cImagesToProcess > 0) 1057 cImagesToProcess--; 1058 } 1059 } 1060 } 1061 1062 /* No image in the chain contains the data for the block. */ 1063 if (rc == VERR_VD_BLOCK_FREE) 1064 { 1065 /* Fill the free space with 0 if we are told to do so 1066 * or a previous read returned valid data. */ 1067 if (fZeroFreeBlocks || !fAllFree) 1068 memset(pvBuf, '\0', cbThisRead); 1069 else 1070 cbBufClear += cbThisRead; 1071 1072 if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1073 rc = VINF_VD_NEW_ZEROED_BLOCK; 1074 else 1075 rc = VINF_SUCCESS; 1076 } 1077 else if (RT_SUCCESS(rc)) 1078 { 1079 /* First not free block, fill the space before with 0. */ 1080 if (!fZeroFreeBlocks) 1081 { 1082 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); 1083 cbBufClear = 0; 1084 fAllFree = false; 1085 } 1086 } 1087 1088 cbRead -= cbThisRead; 1089 uOffset += cbThisRead; 1090 pvBuf = (char *)pvBuf + cbThisRead; 1091 } while (cbRead != 0 && RT_SUCCESS(rc)); 1092 1093 return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; 1956 1094 } 1957 1095 … … 1965 1103 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, 1966 1104 true /* fZeroFreeBlocks */, fUpdateCache, 0); 1105 } 1106 1107 /** 1108 * Creates a new empty discard state. 1109 * 1110 * @returns Pointer to the new discard state or NULL if out of memory. 1111 */ 1112 static PVDDISCARDSTATE vdDiscardStateCreate(void) 1113 { 1114 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE)); 1115 1116 if (pDiscard) 1117 { 1118 RTListInit(&pDiscard->ListLru); 1119 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE)); 1120 if (!pDiscard->pTreeBlocks) 1121 { 1122 RTMemFree(pDiscard); 1123 pDiscard = NULL; 1124 } 1125 } 1126 1127 return pDiscard; 1128 } 1129 1130 /** 1131 * Removes the least recently used blocks from the waiting list until 1132 * the new value is reached. 1133 * 1134 * @returns VBox status code. 1135 * @param pDisk VD disk container. 1136 * @param pDiscard The discard state. 1137 * @param cbDiscardingNew How many bytes should be waiting on success. 1138 * The number of bytes waiting can be less. 1139 */ 1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew) 1141 { 1142 int rc = VINF_SUCCESS; 1143 1144 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n", 1145 pDisk, pDiscard, cbDiscardingNew)); 1146 1147 while (pDiscard->cbDiscarding > cbDiscardingNew) 1148 { 1149 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru); 1150 1151 Assert(!RTListIsEmpty(&pDiscard->ListLru)); 1152 1153 /* Go over the allocation bitmap and mark all discarded sectors as unused. */ 1154 uint64_t offStart = pBlock->Core.Key; 1155 uint32_t idxStart = 0; 1156 size_t cbLeft = pBlock->cbDiscard; 1157 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); 1158 uint32_t cSectors = pBlock->cbDiscard / 512; 1159 1160 while (cbLeft > 0) 1161 { 1162 int32_t idxEnd; 1163 size_t cbThis = cbLeft; 1164 1165 if (fAllocated) 1166 { 1167 /* Check for the first unallocated bit. */ 1168 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart); 1169 if (idxEnd != -1) 1170 { 1171 cbThis = (idxEnd - idxStart) * 512; 1172 fAllocated = false; 1173 } 1174 } 1175 else 1176 { 1177 /* Mark as unused and check for the first set bit. */ 1178 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart); 1179 if (idxEnd != -1) 1180 cbThis = (idxEnd - idxStart) * 512; 1181 1182 1183 VDIOCTX IoCtx; 1184 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, 1185 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1186 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, 1187 &IoCtx, offStart, cbThis, NULL, 1188 NULL, &cbThis, NULL, 1189 VD_DISCARD_MARK_UNUSED); 1190 if (RT_FAILURE(rc)) 1191 break; 1192 1193 fAllocated = true; 1194 } 1195 1196 idxStart = idxEnd; 1197 offStart += cbThis; 1198 cbLeft -= cbThis; 1199 } 1200 1201 if (RT_FAILURE(rc)) 1202 break; 1203 1204 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); 1205 Assert(pBlockRemove == pBlock); 1206 RTListNodeRemove(&pBlock->NodeLru); 1207 1208 pDiscard->cbDiscarding -= pBlock->cbDiscard; 1209 RTMemFree(pBlock->pbmAllocated); 1210 RTMemFree(pBlock); 1211 } 1212 1213 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew); 1214 1215 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1216 return rc; 1217 } 1218 1219 /** 1220 * Destroys the current discard state, writing any waiting blocks to the image. 1221 * 1222 * @returns VBox status code. 1223 * @param pDisk VD disk container. 1224 */ 1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk) 1226 { 1227 int rc = VINF_SUCCESS; 1228 1229 if (pDisk->pDiscard) 1230 { 1231 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */); 1232 AssertRC(rc); 1233 RTMemFree(pDisk->pDiscard->pTreeBlocks); 1234 RTMemFree(pDisk->pDiscard); 1235 pDisk->pDiscard = NULL; 1236 } 1237 1238 return rc; 1239 } 1240 1241 /** 1242 * Marks the given range as allocated in the image. 1243 * Required if there are discards in progress and a write to a block which can get discarded 1244 * is written to. 1245 * 1246 * @returns VBox status code. 1247 * @param pDisk VD container data. 1248 * @param uOffset First byte to mark as allocated. 1249 * @param cbRange Number of bytes to mark as allocated. 1250 */ 1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange) 1252 { 1253 PVDDISCARDSTATE pDiscard = pDisk->pDiscard; 1254 int rc = VINF_SUCCESS; 1255 1256 if (pDiscard) 1257 { 1258 do 1259 { 1260 size_t cbThisRange = cbRange; 1261 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset); 1262 1263 if (pBlock) 1264 { 1265 int32_t idxStart, idxEnd; 1266 1267 Assert(!(cbThisRange % 512)); 1268 Assert(!((uOffset - pBlock->Core.Key) % 512)); 1269 1270 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); 1271 1272 idxStart = (uOffset - pBlock->Core.Key) / 512; 1273 idxEnd = idxStart + (cbThisRange / 512); 1274 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); 1275 } 1276 else 1277 { 1278 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true); 1279 if (pBlock) 1280 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset); 1281 } 1282 1283 Assert(cbRange >= cbThisRange); 1284 1285 uOffset += cbThisRange; 1286 cbRange -= cbThisRange; 1287 } while (cbRange != 0); 1288 } 1289 1290 return rc; 1291 } 1292 1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1294 uint64_t uOffset, size_t cbTransfer, 1295 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, 1296 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1297 uint32_t fFlags) 1298 { 1299 PVDIOCTX pIoCtx = NULL; 1300 1301 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1302 if (RT_LIKELY(pIoCtx)) 1303 { 1304 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1305 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1306 } 1307 1308 return pIoCtx; 1309 } 1310 1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1312 uint64_t uOffset, size_t cbTransfer, 1313 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1314 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1315 void *pvUser1, void *pvUser2, 1316 void *pvAllocation, 1317 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1318 uint32_t fFlags) 1319 { 1320 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1321 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1322 1323 if (RT_LIKELY(pIoCtx)) 1324 { 1325 pIoCtx->pIoCtxParent = NULL; 1326 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1327 pIoCtx->Type.Root.pvUser1 = pvUser1; 1328 pIoCtx->Type.Root.pvUser2 = pvUser2; 1329 } 1330 1331 LogFlow(("Allocated root I/O context %#p\n", pIoCtx)); 1332 return pIoCtx; 1333 } 1334 1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, 1336 unsigned cRanges, 1337 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1338 void *pvUser1, void *pvUser2, 1339 void *pvAllocation, 1340 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1341 uint32_t fFlags) 1342 { 1343 PVDIOCTX pIoCtx = NULL; 1344 1345 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1346 if (RT_LIKELY(pIoCtx)) 1347 { 1348 pIoCtx->pIoCtxNext = NULL; 1349 pIoCtx->pDisk = pDisk; 1350 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD; 1351 pIoCtx->cDataTransfersPending = 0; 1352 pIoCtx->cMetaTransfersPending = 0; 1353 pIoCtx->fComplete = false; 1354 pIoCtx->fFlags = fFlags; 1355 pIoCtx->pvAllocation = pvAllocation; 1356 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; 1357 pIoCtx->pfnIoCtxTransferNext = NULL; 1358 pIoCtx->rcReq = VINF_SUCCESS; 1359 pIoCtx->Req.Discard.paRanges = paRanges; 1360 pIoCtx->Req.Discard.cRanges = cRanges; 1361 pIoCtx->Req.Discard.idxRange = 0; 1362 pIoCtx->Req.Discard.cbDiscardLeft = 0; 1363 pIoCtx->Req.Discard.offCur = 0; 1364 pIoCtx->Req.Discard.cbThisDiscard = 0; 1365 1366 pIoCtx->pIoCtxParent = NULL; 1367 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1368 pIoCtx->Type.Root.pvUser1 = pvUser1; 1369 pIoCtx->Type.Root.pvUser2 = pvUser2; 1370 } 1371 1372 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx)); 1373 return pIoCtx; 1374 } 1375 1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1377 uint64_t uOffset, size_t cbTransfer, 1378 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1379 PVDIOCTX pIoCtxParent, size_t cbTransferParent, 1380 size_t cbWriteParent, void *pvAllocation, 1381 PFNVDIOCTXTRANSFER pfnIoCtxTransfer) 1382 { 1383 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1384 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0); 1385 1386 AssertPtr(pIoCtxParent); 1387 Assert(!pIoCtxParent->pIoCtxParent); 1388 1389 if (RT_LIKELY(pIoCtx)) 1390 { 1391 pIoCtx->pIoCtxParent = pIoCtxParent; 1392 pIoCtx->Type.Child.uOffsetSaved = uOffset; 1393 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer; 1394 pIoCtx->Type.Child.cbTransferParent = cbTransferParent; 1395 pIoCtx->Type.Child.cbWriteParent = cbWriteParent; 1396 } 1397 1398 LogFlow(("Allocated child I/O context %#p\n", pIoCtx)); 1399 return pIoCtx; 1400 } 1401 1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer) 1403 { 1404 PVDIOTASK pIoTask = NULL; 1405 1406 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1407 if (pIoTask) 1408 { 1409 pIoTask->pIoStorage = pIoStorage; 1410 pIoTask->pfnComplete = pfnComplete; 1411 pIoTask->pvUser = pvUser; 1412 pIoTask->fMeta = false; 1413 pIoTask->Type.User.cbTransfer = cbTransfer; 1414 pIoTask->Type.User.pIoCtx = pIoCtx; 1415 } 1416 1417 return pIoTask; 1418 } 1419 1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer) 1421 { 1422 PVDIOTASK pIoTask = NULL; 1423 1424 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1425 if (pIoTask) 1426 { 1427 pIoTask->pIoStorage = pIoStorage; 1428 pIoTask->pfnComplete = pfnComplete; 1429 pIoTask->pvUser = pvUser; 1430 pIoTask->fMeta = true; 1431 pIoTask->Type.Meta.pMetaXfer = pMetaXfer; 1432 } 1433 1434 return pIoTask; 1435 } 1436 1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1438 { 1439 LogFlow(("Freeing I/O context %#p\n", pIoCtx)); 1440 if (pIoCtx->pvAllocation) 1441 RTMemFree(pIoCtx->pvAllocation); 1442 #ifdef DEBUG 1443 memset(pIoCtx, 0xff, sizeof(VDIOCTX)); 1444 #endif 1445 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); 1446 } 1447 1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) 1449 { 1450 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); 1451 } 1452 1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) 1454 { 1455 AssertPtr(pIoCtx->pIoCtxParent); 1456 1457 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 1458 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; 1459 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; 1460 } 1461 1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) 1463 { 1464 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb])); 1465 1466 if (RT_LIKELY(pMetaXfer)) 1467 { 1468 pMetaXfer->Core.Key = uOffset; 1469 pMetaXfer->Core.KeyLast = uOffset + cb - 1; 1470 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE; 1471 pMetaXfer->cbMeta = cb; 1472 pMetaXfer->pIoStorage = pIoStorage; 1473 pMetaXfer->cRefs = 0; 1474 RTListInit(&pMetaXfer->ListIoCtxWaiting); 1475 } 1476 return pMetaXfer; 1477 } 1478 1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) 1480 { 1481 /* Put it on the waiting list. */ 1482 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); 1483 PVDIOCTX pHeadOld; 1484 pIoCtx->pIoCtxNext = pNext; 1485 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) 1486 { 1487 pNext = pHeadOld; 1488 Assert(pNext != pIoCtx); 1489 pIoCtx->pIoCtxNext = pNext; 1490 ASMNopPause(); 1491 } 1492 } 1493 1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1495 { 1496 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); 1497 1498 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); 1499 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1500 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); 1501 } 1502 1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) 1504 { 1505 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData); 1506 } 1507 1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) 1509 { 1510 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); 1511 } 1512 1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) 1514 { 1515 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1516 } 1517 1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) 1519 { 1520 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1521 } 1522 1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) 1524 { 1525 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData); 1526 } 1527 1528 /** 1529 * Process the I/O context, core method which assumes that the I/O context 1530 * acquired the lock. 1531 * 1532 * @returns VBox status code. 1533 * @param pIoCtx I/O context to process. 1534 */ 1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) 1536 { 1537 int rc = VINF_SUCCESS; 1538 1539 VD_IS_LOCKED(pIoCtx->pDisk); 1540 1541 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1542 1543 if ( !pIoCtx->cMetaTransfersPending 1544 && !pIoCtx->cDataTransfersPending 1545 && !pIoCtx->pfnIoCtxTransfer) 1546 { 1547 rc = VINF_VD_ASYNC_IO_FINISHED; 1548 goto out; 1549 } 1550 1551 /* 1552 * We complete the I/O context in case of an error 1553 * if there is no I/O task pending. 1554 */ 1555 if ( RT_FAILURE(pIoCtx->rcReq) 1556 && !pIoCtx->cMetaTransfersPending 1557 && !pIoCtx->cDataTransfersPending) 1558 { 1559 rc = VINF_VD_ASYNC_IO_FINISHED; 1560 goto out; 1561 } 1562 1563 /* Don't change anything if there is a metadata transfer pending or we are blocked. */ 1564 if ( pIoCtx->cMetaTransfersPending 1565 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) 1566 { 1567 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1568 goto out; 1569 } 1570 1571 if (pIoCtx->pfnIoCtxTransfer) 1572 { 1573 /* Call the transfer function advancing to the next while there is no error. */ 1574 while ( pIoCtx->pfnIoCtxTransfer 1575 && !pIoCtx->cMetaTransfersPending 1576 && RT_SUCCESS(rc)) 1577 { 1578 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer)); 1579 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx); 1580 1581 /* Advance to the next part of the transfer if the current one succeeded. */ 1582 if (RT_SUCCESS(rc)) 1583 { 1584 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; 1585 pIoCtx->pfnIoCtxTransferNext = NULL; 1586 } 1587 } 1588 } 1589 1590 if ( RT_SUCCESS(rc) 1591 && !pIoCtx->cMetaTransfersPending 1592 && !pIoCtx->cDataTransfersPending) 1593 rc = VINF_VD_ASYNC_IO_FINISHED; 1594 else if ( RT_SUCCESS(rc) 1595 || rc == VERR_VD_NOT_ENOUGH_METADATA 1596 || rc == VERR_VD_IOCTX_HALT) 1597 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1598 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) 1599 { 1600 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); 1601 /* 1602 * The I/O context completed if we have an error and there is no data 1603 * or meta data transfer pending. 1604 */ 1605 if ( !pIoCtx->cMetaTransfersPending 1606 && !pIoCtx->cDataTransfersPending) 1607 rc = VINF_VD_ASYNC_IO_FINISHED; 1608 else 1609 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1610 } 1611 1612 out: 1613 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 1614 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending, 1615 pIoCtx->fComplete)); 1616 1617 return rc; 1618 } 1619 1620 /** 1621 * Processes the list of waiting I/O contexts. 1622 * 1623 * @returns VBox status code. 1624 * @param pDisk The disk structure. 1625 * @param pIoCtxRc An I/O context handle which waits on the list. When processed 1626 * The status code is returned. NULL if there is no I/O context 1627 * to return the status code for. 1628 */ 1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) 1630 { 1631 int rc = VINF_SUCCESS; 1632 1633 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); 1634 1635 VD_IS_LOCKED(pDisk); 1636 1637 /* Get the waiting list and process it in FIFO order. */ 1638 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); 1639 1640 /* Reverse it. */ 1641 PVDIOCTX pCur = pIoCtxHead; 1642 pIoCtxHead = NULL; 1643 while (pCur) 1644 { 1645 PVDIOCTX pInsert = pCur; 1646 pCur = pCur->pIoCtxNext; 1647 pInsert->pIoCtxNext = pIoCtxHead; 1648 pIoCtxHead = pInsert; 1649 } 1650 1651 /* Process now. */ 1652 pCur = pIoCtxHead; 1653 while (pCur) 1654 { 1655 int rcTmp; 1656 PVDIOCTX pTmp = pCur; 1657 1658 pCur = pCur->pIoCtxNext; 1659 pTmp->pIoCtxNext = NULL; 1660 1661 rcTmp = vdIoCtxProcessLocked(pTmp); 1662 if (pTmp == pIoCtxRc) 1663 { 1664 /* The given I/O context was processed, pass the return code to the caller. */ 1665 rc = rcTmp; 1666 } 1667 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED 1668 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1669 { 1670 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1671 vdThreadFinishWrite(pDisk); 1672 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1673 pTmp->Type.Root.pvUser2, 1674 pTmp->rcReq); 1675 vdIoCtxFree(pDisk, pTmp); 1676 } 1677 } 1678 1679 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1680 return rc; 1681 } 1682 1683 /** 1684 * Processes the list of blocked I/O contexts. 1685 * 1686 * @returns nothing. 1687 * @param pDisk The disk structure. 1688 */ 1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) 1690 { 1691 LogFlowFunc(("pDisk=%#p\n", pDisk)); 1692 1693 VD_IS_LOCKED(pDisk); 1694 1695 /* Get the waiting list and process it in FIFO order. */ 1696 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); 1697 1698 /* Reverse it. */ 1699 PVDIOCTX pCur = pIoCtxHead; 1700 pIoCtxHead = NULL; 1701 while (pCur) 1702 { 1703 PVDIOCTX pInsert = pCur; 1704 pCur = pCur->pIoCtxNext; 1705 pInsert->pIoCtxNext = pIoCtxHead; 1706 pIoCtxHead = pInsert; 1707 } 1708 1709 /* Process now. */ 1710 pCur = pIoCtxHead; 1711 while (pCur) 1712 { 1713 int rc; 1714 PVDIOCTX pTmp = pCur; 1715 1716 pCur = pCur->pIoCtxNext; 1717 pTmp->pIoCtxNext = NULL; 1718 1719 Assert(!pTmp->pIoCtxParent); 1720 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); 1721 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; 1722 1723 rc = vdIoCtxProcessLocked(pTmp); 1724 if ( rc == VINF_VD_ASYNC_IO_FINISHED 1725 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1726 { 1727 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1728 vdThreadFinishWrite(pDisk); 1729 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1730 pTmp->Type.Root.pvUser2, 1731 pTmp->rcReq); 1732 vdIoCtxFree(pDisk, pTmp); 1733 } 1734 } 1735 1736 LogFlowFunc(("returns\n")); 1737 } 1738 1739 /** 1740 * Processes the I/O context trying to lock the criticial section. 1741 * The context is deferred if the critical section is busy. 1742 * 1743 * @returns VBox status code. 1744 * @param pIoCtx The I/O context to process. 1745 */ 1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) 1747 { 1748 int rc = VINF_SUCCESS; 1749 PVBOXHDD pDisk = pIoCtx->pDisk; 1750 1751 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1752 1753 /* Put it on the waiting list first. */ 1754 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); 1755 1756 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) 1757 { 1758 /* Leave it again, the context will be processed just before leaving the lock. */ 1759 LogFlowFunc(("Successfully acquired the lock\n")); 1760 rc = vdDiskUnlock(pDisk, pIoCtx); 1761 } 1762 else 1763 { 1764 LogFlowFunc(("Lock is held\n")); 1765 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1766 } 1767 1768 return rc; 1769 } 1770 1771 /** 1772 * Process the I/O context in a synchronous manner, waiting 1773 * for it to complete. 1774 * 1775 * @returns VBox status code of the completed request. 1776 * @param pIoCtx The sync I/O context. 1777 */ 1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) 1779 { 1780 int rc = VINF_SUCCESS; 1781 PVBOXHDD pDisk = pIoCtx->pDisk; 1782 1783 LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); 1784 1785 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, 1786 ("I/O context is not marked as synchronous\n")); 1787 1788 rc = vdIoCtxProcessTryLockDefer(pIoCtx); 1789 if (rc == VINF_VD_ASYNC_IO_FINISHED) 1790 rc = VINF_SUCCESS; 1791 1792 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1793 { 1794 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); 1795 AssertRC(rc); 1796 1797 rc = pDisk->rcSync; 1798 } 1799 else /* Success or error. */ 1800 vdIoCtxFree(pDisk, pIoCtx); 1801 1802 return rc; 1803 } 1804 1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1806 { 1807 return pDisk->pIoCtxLockOwner == pIoCtx; 1808 } 1809 1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1811 { 1812 int rc = VINF_SUCCESS; 1813 1814 VD_IS_LOCKED(pDisk); 1815 1816 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); 1817 1818 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) 1819 { 1820 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ 1821 vdIoCtxDefer(pDisk, pIoCtx); 1822 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1823 } 1824 1825 LogFlowFunc(("returns -> %Rrc\n", rc)); 1826 return rc; 1827 } 1828 1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) 1830 { 1831 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", 1832 pDisk, pIoCtx, fProcessBlockedReqs)); 1833 1834 VD_IS_LOCKED(pDisk); 1835 1836 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); 1837 Assert(pDisk->pIoCtxLockOwner == pIoCtx); 1838 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); 1839 1840 if (fProcessBlockedReqs) 1841 { 1842 /* Process any blocked writes if the current request didn't caused another growing. */ 1843 vdDiskProcessBlockedIoCtx(pDisk); 1844 } 1845 1846 LogFlowFunc(("returns\n")); 1847 } 1848 1849 /** 1850 * internal: read the specified amount of data in whatever blocks the backend 1851 * will give us - async version. 1852 */ 1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx) 1854 { 1855 int rc; 1856 size_t cbToRead = pIoCtx->Req.Io.cbTransfer; 1857 uint64_t uOffset = pIoCtx->Req.Io.uOffset; 1858 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;; 1859 size_t cbThisRead; 1860 1861 /* Loop until all reads started or we have a backend which needs to read metadata. */ 1862 do 1863 { 1864 /* Search for image with allocated block. Do not attempt to read more 1865 * than the previous reads marked as valid. Otherwise this would return 1866 * stale data when different block sizes are used for the images. */ 1867 cbThisRead = cbToRead; 1868 1869 /* 1870 * Try to read from the given image. 1871 * If the block is not allocated read from override chain if present. 1872 */ 1873 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1874 uOffset, cbThisRead, 1875 pIoCtx, &cbThisRead); 1876 1877 if (rc == VERR_VD_BLOCK_FREE) 1878 { 1879 while ( pCurrImage->pPrev != NULL 1880 && rc == VERR_VD_BLOCK_FREE) 1881 { 1882 pCurrImage = pCurrImage->pPrev; 1883 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1884 uOffset, cbThisRead, 1885 pIoCtx, &cbThisRead); 1886 } 1887 } 1888 1889 /* The task state will be updated on success already, don't do it here!. */ 1890 if (rc == VERR_VD_BLOCK_FREE) 1891 { 1892 /* No image in the chain contains the data for the block. */ 1893 vdIoCtxSet(pIoCtx, '\0', cbThisRead); 1894 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); 1895 rc = VINF_SUCCESS; 1896 } 1897 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1898 rc = VINF_SUCCESS; 1899 else if (rc == VERR_VD_IOCTX_HALT) 1900 { 1901 uOffset += cbThisRead; 1902 cbToRead -= cbThisRead; 1903 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1904 } 1905 1906 if (RT_FAILURE(rc)) 1907 break; 1908 1909 cbToRead -= cbThisRead; 1910 uOffset += cbThisRead; 1911 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */ 1912 } while (cbToRead != 0 && RT_SUCCESS(rc)); 1913 1914 if ( rc == VERR_VD_NOT_ENOUGH_METADATA 1915 || rc == VERR_VD_IOCTX_HALT) 1916 { 1917 /* Save the current state. */ 1918 pIoCtx->Req.Io.uOffset = uOffset; 1919 pIoCtx->Req.Io.cbTransfer = cbToRead; 1920 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; 1921 } 1922 1923 return rc; 1924 } 1925 1926 /** 1927 * internal: parent image read wrapper for compacting. 1928 */ 1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, 1930 size_t cbRead) 1931 { 1932 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; 1933 return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset, 1934 pvBuf, cbRead, false /* fUpdateCache */); 1967 1935 } 1968 1936 … … 2016 1984 2017 1985 /** 1986 * internal: write a complete block (only used for diff images), taking the 1987 * remaining data from parent images. This implementation does not optimize 1988 * anything (except that it tries to read only that portions from parent 1989 * images that are really needed). 1990 */ 1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage, 1992 PVDIMAGE pImageParentOverride, 1993 uint64_t uOffset, size_t cbWrite, 1994 size_t cbThisWrite, size_t cbPreRead, 1995 size_t cbPostRead, const void *pvBuf, 1996 void *pvTmp) 1997 { 1998 int rc = VINF_SUCCESS; 1999 2000 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n", 2001 pDisk, pImage, pImageParentOverride, uOffset, cbWrite)); 2002 2003 /* Read the data that goes before the write to fill the block. */ 2004 if (cbPreRead) 2005 { 2006 /* 2007 * Updating the cache doesn't make sense here because 2008 * this will be done after the complete block was written. 2009 */ 2010 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, 2011 uOffset - cbPreRead, pvTmp, cbPreRead, 2012 true /* fZeroFreeBlocks*/, 2013 false /* fUpdateCache */, 0); 2014 if (RT_FAILURE(rc)) 2015 return rc; 2016 } 2017 2018 /* Copy the data to the right place in the buffer. */ 2019 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); 2020 2021 /* Read the data that goes after the write to fill the block. */ 2022 if (cbPostRead) 2023 { 2024 /* If we have data to be written, use that instead of reading 2025 * data from the image. */ 2026 size_t cbWriteCopy; 2027 if (cbWrite > cbThisWrite) 2028 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2029 else 2030 cbWriteCopy = 0; 2031 /* Figure out how much we cannot read from the image, because 2032 * the last block to write might exceed the nominal size of the 2033 * image for technical reasons. */ 2034 size_t cbFill; 2035 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2036 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2037 else 2038 cbFill = 0; 2039 /* The rest must be read from the image. */ 2040 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2041 2042 /* Now assemble the remaining data. */ 2043 if (cbWriteCopy) 2044 memcpy((char *)pvTmp + cbPreRead + cbThisWrite, 2045 (char *)pvBuf + cbThisWrite, cbWriteCopy); 2046 if (cbReadImage) 2047 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, 2048 uOffset + cbThisWrite + cbWriteCopy, 2049 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy, 2050 cbReadImage, true /* fZeroFreeBlocks */, 2051 false /* fUpdateCache */, 0); 2052 if (RT_FAILURE(rc)) 2053 return rc; 2054 /* Zero out the remainder of this block. Will never be visible, as this 2055 * is beyond the limit of the image. */ 2056 if (cbFill) 2057 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, 2058 '\0', cbFill); 2059 } 2060 2061 /* Write the full block to the virtual disk. */ 2062 RTSGSEG SegmentBuf; 2063 RTSGBUF SgBuf; 2064 VDIOCTX IoCtx; 2065 2066 SegmentBuf.pvSeg = pvTmp; 2067 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead; 2068 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2069 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2070 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2071 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead, 2072 cbPreRead + cbThisWrite + cbPostRead, 2073 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2074 Assert(rc != VERR_VD_BLOCK_FREE); 2075 Assert(cbPreRead == 0); 2076 Assert(cbPostRead == 0); 2077 2078 return rc; 2079 } 2080 2081 /** 2082 * internal: write a complete block (only used for diff images), taking the 2083 * remaining data from parent images. This implementation optimizes out writes 2084 * that do not change the data relative to the state as of the parent images. 2085 * All backends which support differential/growing images support this. 2086 */ 2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage, 2088 PVDIMAGE pImageParentOverride, 2089 uint64_t uOffset, size_t cbWrite, 2090 size_t cbThisWrite, size_t cbPreRead, 2091 size_t cbPostRead, const void *pvBuf, 2092 void *pvTmp, unsigned cImagesRead) 2093 { 2094 size_t cbFill = 0; 2095 size_t cbWriteCopy = 0; 2096 size_t cbReadImage = 0; 2097 int rc; 2098 2099 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n", 2100 pDisk, pImage, pImageParentOverride, uOffset, cbWrite)); 2101 2102 if (cbPostRead) 2103 { 2104 /* Figure out how much we cannot read from the image, because 2105 * the last block to write might exceed the nominal size of the 2106 * image for technical reasons. */ 2107 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2108 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2109 2110 /* If we have data to be written, use that instead of reading 2111 * data from the image. */ 2112 if (cbWrite > cbThisWrite) 2113 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2114 2115 /* The rest must be read from the image. */ 2116 cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2117 } 2118 2119 /* Read the entire data of the block so that we can compare whether it will 2120 * be modified by the write or not. */ 2121 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp, 2122 cbPreRead + cbThisWrite + cbPostRead - cbFill, 2123 true /* fZeroFreeBlocks */, false /* fUpdateCache */, 2124 cImagesRead); 2125 if (RT_FAILURE(rc)) 2126 return rc; 2127 2128 /* Check if the write would modify anything in this block. */ 2129 if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite) 2130 && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite, 2131 (char *)pvBuf + cbThisWrite, cbWriteCopy))) 2132 { 2133 /* Block is completely unchanged, so no need to write anything. */ 2134 return VINF_SUCCESS; 2135 } 2136 2137 /* Copy the data to the right place in the buffer. */ 2138 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); 2139 2140 /* Handle the data that goes after the write to fill the block. */ 2141 if (cbPostRead) 2142 { 2143 /* Now assemble the remaining data. */ 2144 if (cbWriteCopy) 2145 memcpy((char *)pvTmp + cbPreRead + cbThisWrite, 2146 (char *)pvBuf + cbThisWrite, cbWriteCopy); 2147 /* Zero out the remainder of this block. Will never be visible, as this 2148 * is beyond the limit of the image. */ 2149 if (cbFill) 2150 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, 2151 '\0', cbFill); 2152 } 2153 2154 /* Write the full block to the virtual disk. */ 2155 RTSGSEG SegmentBuf; 2156 RTSGBUF SgBuf; 2157 VDIOCTX IoCtx; 2158 2159 SegmentBuf.pvSeg = pvTmp; 2160 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead; 2161 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2162 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2163 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2164 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead, 2165 cbPreRead + cbThisWrite + cbPostRead, 2166 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2167 Assert(rc != VERR_VD_BLOCK_FREE); 2168 Assert(cbPreRead == 0); 2169 Assert(cbPostRead == 0); 2170 2171 return rc; 2172 } 2173 2174 /** 2018 2175 * internal: write buffer to the image, taking care of block boundaries and 2019 2176 * write optimizations. … … 2024 2181 bool fUpdateCache, unsigned cImagesRead) 2025 2182 { 2026 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 2027 RTSGSEG Segment; 2183 int rc; 2184 unsigned fWrite; 2185 size_t cbThisWrite; 2186 size_t cbPreRead, cbPostRead; 2187 uint64_t uOffsetCur = uOffset; 2188 size_t cbWriteCur = cbWrite; 2189 const void *pcvBufCur = pvBuf; 2190 RTSGSEG SegmentBuf; 2028 2191 RTSGBUF SgBuf; 2029 2192 VDIOCTX IoCtx; 2030 2193 2031 if (fUpdateCache) 2032 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 2033 2034 Segment.pvSeg = (void *)pvBuf; 2035 Segment.cbSeg = cbWrite; 2036 RTSgBufInit(&SgBuf, &Segment, 1); 2037 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, 2038 NULL, vdWriteHelperAsync, fFlags); 2039 2040 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 2041 IoCtx.Req.Io.cImagesRead = cImagesRead; 2042 IoCtx.pIoCtxParent = NULL; 2043 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 2044 IoCtx.Type.Root.pvUser1 = pDisk; 2045 IoCtx.Type.Root.pvUser2 = NULL; 2046 return vdIoCtxProcessSync(&IoCtx); 2194 /* Loop until all written. */ 2195 do 2196 { 2197 /* Try to write the possibly partial block to the last opened image. 2198 * This works when the block is already allocated in this image or 2199 * if it is a full-block write (and allocation isn't suppressed below). 2200 * For image formats which don't support zero blocks, it's beneficial 2201 * to avoid unnecessarily allocating unchanged blocks. This prevents 2202 * unwanted expanding of images. VMDK is an example. */ 2203 cbThisWrite = cbWriteCur; 2204 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) 2205 ? 0 : VD_WRITE_NO_ALLOC; 2206 2207 SegmentBuf.pvSeg = (void *)pcvBufCur; 2208 SegmentBuf.cbSeg = cbWrite; 2209 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2210 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2211 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2212 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite, 2213 &IoCtx, &cbThisWrite, &cbPreRead, 2214 &cbPostRead, fWrite); 2215 if (rc == VERR_VD_BLOCK_FREE) 2216 { 2217 void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); 2218 AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); 2219 2220 if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) 2221 { 2222 /* Optimized write, suppress writing to a so far unallocated 2223 * block if the data is in fact not changed. */ 2224 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, 2225 uOffsetCur, cbWriteCur, 2226 cbThisWrite, cbPreRead, cbPostRead, 2227 pcvBufCur, pvTmp, cImagesRead); 2228 } 2229 else 2230 { 2231 /* Normal write, not optimized in any way. The block will 2232 * be written no matter what. This will usually (unless the 2233 * backend has some further optimization enabled) cause the 2234 * block to be allocated. */ 2235 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, 2236 uOffsetCur, cbWriteCur, 2237 cbThisWrite, cbPreRead, cbPostRead, 2238 pcvBufCur, pvTmp); 2239 } 2240 RTMemTmpFree(pvTmp); 2241 if (RT_FAILURE(rc)) 2242 break; 2243 } 2244 2245 cbWriteCur -= cbThisWrite; 2246 uOffsetCur += cbThisWrite; 2247 pcvBufCur = (char *)pcvBufCur + cbThisWrite; 2248 } while (cbWriteCur != 0 && RT_SUCCESS(rc)); 2249 2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */ 2251 /* Update the cache on success */ 2252 if ( RT_SUCCESS(rc) 2253 && pDisk->pCache 2254 && fUpdateCache) 2255 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); 2256 2257 if (RT_SUCCESS(rc)) 2258 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); 2259 #endif 2260 2261 return rc; 2047 2262 } 2048 2263 … … 2276 2491 } 2277 2492 2278 static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) 2493 /** 2494 * internal: write a complete block (only used for diff images), taking the 2495 * remaining data from parent images. This implementation does not optimize 2496 * anything (except that it tries to read only that portions from parent 2497 * images that are really needed) - async version. 2498 */ 2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2500 { 2501 int rc = VINF_SUCCESS; 2502 2503 #if 0 2504 2505 /* Read the data that goes before the write to fill the block. */ 2506 if (cbPreRead) 2507 { 2508 rc = vdReadHelperAsync(pIoCtxDst); 2509 if (RT_FAILURE(rc)) 2510 return rc; 2511 } 2512 2513 /* Copy the data to the right place in the buffer. */ 2514 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); 2515 2516 /* Read the data that goes after the write to fill the block. */ 2517 if (cbPostRead) 2518 { 2519 /* If we have data to be written, use that instead of reading 2520 * data from the image. */ 2521 size_t cbWriteCopy; 2522 if (cbWrite > cbThisWrite) 2523 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2524 else 2525 cbWriteCopy = 0; 2526 /* Figure out how much we cannot read from the image, because 2527 * the last block to write might exceed the nominal size of the 2528 * image for technical reasons. */ 2529 size_t cbFill; 2530 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2531 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2532 else 2533 cbFill = 0; 2534 /* The rest must be read from the image. */ 2535 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2536 2537 /* Now assemble the remaining data. */ 2538 if (cbWriteCopy) 2539 { 2540 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); 2541 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); 2542 } 2543 2544 if (cbReadImage) 2545 rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, 2546 uOffset + cbThisWrite + cbWriteCopy, 2547 cbReadImage); 2548 if (RT_FAILURE(rc)) 2549 return rc; 2550 /* Zero out the remainder of this block. Will never be visible, as this 2551 * is beyond the limit of the image. */ 2552 if (cbFill) 2553 { 2554 vdIoCtxSet(pIoCtxDst, '\0', cbFill); 2555 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); 2556 } 2557 } 2558 2559 if ( !pIoCtxDst->cbTransferLeft 2560 && !pIoCtxDst->cMetaTransfersPending 2561 && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) 2562 { 2563 /* Write the full block to the virtual disk. */ 2564 vdIoCtxChildReset(pIoCtxDst); 2565 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2566 uOffset - cbPreRead, 2567 cbPreRead + cbThisWrite + cbPostRead, 2568 pIoCtxDst, 2569 NULL, &cbPreRead, &cbPostRead, 0); 2570 Assert(rc != VERR_VD_BLOCK_FREE); 2571 Assert(cbPreRead == 0); 2572 Assert(cbPostRead == 0); 2573 } 2574 else 2575 { 2576 LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 2577 pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, 2578 pIoCtxDst->fComplete)); 2579 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2580 } 2581 2582 return rc; 2583 #endif 2584 return VERR_NOT_IMPLEMENTED; 2585 } 2586 2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) 2279 2588 { 2280 2589 int rc = VINF_SUCCESS; … … 2286 2595 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2287 2596 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2288 pIoCtx->Req.Io.uOffset - cbPreRead,2289 cbPreRead + cbThisWrite + cbPostRead,2290 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);2597 pIoCtx->Req.Io.uOffset - cbPreRead, 2598 cbPreRead + cbThisWrite + cbPostRead, 2599 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2291 2600 Assert(rc != VERR_VD_BLOCK_FREE); 2292 2601 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); … … 2377 2686 /* Write the full block to the virtual disk. */ 2378 2687 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2379 pIoCtx->pfnIoCtxTransferNext = vdWriteHelper CommitAsync;2688 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync; 2380 2689 2381 2690 return rc; … … 2387 2696 2388 2697 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2389 2390 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;2391 2698 2392 2699 if (pIoCtx->Req.Io.cbTransferLeft) … … 2455 2762 /* Next step */ 2456 2763 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync; 2457 return VINF_SUCCESS;2458 }2459 2460 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)2461 {2462 int rc = VINF_SUCCESS;2463 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;2464 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;2465 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;2466 2467 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2468 2469 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);2470 if (cbPostRead)2471 {2472 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;2473 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;2474 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;2475 2476 /* Now assemble the remaining data. */2477 if (cbWriteCopy)2478 {2479 /*2480 * The S/G buffer of the parent needs to be cloned because2481 * it is not allowed to modify the state.2482 */2483 RTSGBUF SgBufParentTmp;2484 2485 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);2486 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);2487 }2488 2489 /* Zero out the remainder of this block. Will never be visible, as this2490 * is beyond the limit of the image. */2491 if (cbFill)2492 {2493 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);2494 vdIoCtxSet(pIoCtx, '\0', cbFill);2495 }2496 2497 if (cbReadImage)2498 {2499 /* Read remaining data. */2500 }2501 else2502 {2503 /* Write the full block to the virtual disk. */2504 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);2505 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;2506 }2507 }2508 else2509 {2510 /* Write the full block to the virtual disk. */2511 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);2512 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;2513 }2514 2515 return rc;2516 }2517 2518 static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)2519 {2520 int rc = VINF_SUCCESS;2521 2522 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2523 2524 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;2525 2526 if (pIoCtx->Req.Io.cbTransferLeft)2527 rc = vdReadHelperAsync(pIoCtx);2528 2529 if ( RT_SUCCESS(rc)2530 && ( pIoCtx->Req.Io.cbTransferLeft2531 || pIoCtx->cMetaTransfersPending))2532 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;2533 else2534 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;2535 2536 return rc;2537 }2538 2539 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)2540 {2541 PVBOXHDD pDisk = pIoCtx->pDisk;2542 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;2543 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;2544 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;2545 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;2546 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;2547 size_t cbFill = 0;2548 size_t cbWriteCopy = 0;2549 size_t cbReadImage = 0;2550 2551 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));2552 2553 AssertPtr(pIoCtx->pIoCtxParent);2554 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);2555 2556 /* Calculate the amount of data to read that goes after the write to fill the block. */2557 if (cbPostRead)2558 {2559 /* If we have data to be written, use that instead of reading2560 * data from the image. */2561 cbWriteCopy;2562 if (cbWrite > cbThisWrite)2563 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2564 2565 /* Figure out how much we cannot read from the image, because2566 * the last block to write might exceed the nominal size of the2567 * image for technical reasons. */2568 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2569 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2570 2571 /* The rest must be read from the image. */2572 cbReadImage = cbPostRead - cbWriteCopy - cbFill;2573 }2574 2575 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;2576 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;2577 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;2578 2579 /* Next step */2580 if (cbPreRead)2581 {2582 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;2583 2584 /* Read the data that goes before the write to fill the block. */2585 pIoCtx->Req.Io.cbTransferLeft = cbPreRead;2586 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;2587 pIoCtx->Req.Io.uOffset -= cbPreRead;2588 }2589 else2590 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;2591 2592 2764 return VINF_SUCCESS; 2593 2765 } … … 9465 9637 pfnComplete, pvUser1, pvUser2, 9466 9638 NULL, vdReadHelperAsync, 9467 VDIOCTX_FLAGS_ ZERO_FREE_BLOCKS);9639 VDIOCTX_FLAGS_DEFAULT); 9468 9640 if (!pIoCtx) 9469 9641 {
Note:
See TracChangeset
for help on using the changeset viewer.