- Timestamp:
- Feb 10, 2013 6:38:22 PM (12 years ago)
- svn:sync-xref-src-repo-rev:
- 83702
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Storage/VD.cpp
r44431 r44616 355 355 /** S/G buffer */ 356 356 RTSGBUF SgBuf; 357 /** Flag whether all reads from the image chain returned VERR_VD_BLOCK_FREE 358 * so far. */ 359 bool fAllFree; 360 /** Number of bytes to clear in the buffer before the current read. */ 361 size_t cbBufClear; 362 /** Number of images to read. */ 363 unsigned cImagesRead; 364 /** Override for the parent image to start reading from. */ 365 PVDIMAGE pImageParentOverride; 357 366 } Io; 358 367 /** Discard requests. */ … … 424 433 425 434 /** Default flags for an I/O context, i.e. unblocked and async. */ 426 #define VDIOCTX_FLAGS_DEFAULT (0)435 #define VDIOCTX_FLAGS_DEFAULT (0) 427 436 /** Flag whether the context is blocked. */ 428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)437 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) 429 438 /** Flag whether the I/O context is using synchronous I/O. */ 430 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 439 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 440 /** Flag whether the read should update the cache. */ 441 #define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2) 442 /** Flag whether free blocks should be zeroed. 443 * If false and no image has data for sepcified 444 * range VERR_VD_BLOCK_FREE is returned for the I/O context. 445 * Note that unallocated blocks are still zeroed 446 * if at least one image has valid data for a part 447 * of the range. 448 */ 449 #define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) 450 /** Don't free the I/O context when complete because 451 * it was alloacted elsewhere (stack, ...). */ 452 #define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) 431 453 432 454 /** NIL I/O context pointer value. */ … … 577 599 /** Forward declaration of the async discard helper. */ 578 600 static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); 601 static int vdWriteHelperAsync(PVDIOCTX pIoCtx); 579 602 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); 580 603 static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); 604 static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq); 581 605 582 606 /** … … 809 833 pIoCtx->Req.Io.pImageStart = pImageStart; 810 834 pIoCtx->Req.Io.pImageCur = pImageStart; 835 pIoCtx->Req.Io.fAllFree = true; 836 pIoCtx->Req.Io.cbBufClear = 0; 837 pIoCtx->Req.Io.pImageParentOverride = NULL; 811 838 pIoCtx->cDataTransfersPending = 0; 812 839 pIoCtx->cMetaTransfersPending = 0; … … 817 844 pIoCtx->pfnIoCtxTransferNext = NULL; 818 845 pIoCtx->rcReq = VINF_SUCCESS; 846 pIoCtx->pIoCtxParent = NULL; 819 847 820 848 /* There is no S/G list for a flush request. */ … … 845 873 */ 846 874 static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, 847 PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)875 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead) 848 876 { 849 877 int rc = VINF_SUCCESS; … … 908 936 909 937 /** 938 * Creates a new empty discard state. 939 * 940 * @returns Pointer to the new discard state or NULL if out of memory. 941 */ 942 static PVDDISCARDSTATE vdDiscardStateCreate(void) 943 { 944 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE)); 945 946 if (pDiscard) 947 { 948 RTListInit(&pDiscard->ListLru); 949 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE)); 950 if (!pDiscard->pTreeBlocks) 951 { 952 RTMemFree(pDiscard); 953 pDiscard = NULL; 954 } 955 } 956 957 return pDiscard; 958 } 959 960 /** 961 * Removes the least recently used blocks from the waiting list until 962 * the new value is reached. 963 * 964 * @returns VBox status code. 965 * @param pDisk VD disk container. 966 * @param pDiscard The discard state. 967 * @param cbDiscardingNew How many bytes should be waiting on success. 968 * The number of bytes waiting can be less. 969 */ 970 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew) 971 { 972 int rc = VINF_SUCCESS; 973 974 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n", 975 pDisk, pDiscard, cbDiscardingNew)); 976 977 while (pDiscard->cbDiscarding > cbDiscardingNew) 978 { 979 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru); 980 981 Assert(!RTListIsEmpty(&pDiscard->ListLru)); 982 983 /* Go over the allocation bitmap and mark all discarded sectors as unused. */ 984 uint64_t offStart = pBlock->Core.Key; 985 uint32_t idxStart = 0; 986 size_t cbLeft = pBlock->cbDiscard; 987 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); 988 uint32_t cSectors = pBlock->cbDiscard / 512; 989 990 while (cbLeft > 0) 991 { 992 int32_t idxEnd; 993 size_t cbThis = cbLeft; 994 995 if (fAllocated) 996 { 997 /* Check for the first unallocated bit. */ 998 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart); 999 if (idxEnd != -1) 1000 { 1001 cbThis = (idxEnd - idxStart) * 512; 1002 fAllocated = false; 1003 } 1004 } 1005 else 1006 { 1007 /* Mark as unused and check for the first set bit. */ 1008 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart); 1009 if (idxEnd != -1) 1010 cbThis = (idxEnd - idxStart) * 512; 1011 1012 1013 VDIOCTX IoCtx; 1014 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, 1015 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1016 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, 1017 &IoCtx, offStart, cbThis, NULL, 1018 NULL, &cbThis, NULL, 1019 VD_DISCARD_MARK_UNUSED); 1020 if (RT_FAILURE(rc)) 1021 break; 1022 1023 fAllocated = true; 1024 } 1025 1026 idxStart = idxEnd; 1027 offStart += cbThis; 1028 cbLeft -= cbThis; 1029 } 1030 1031 if (RT_FAILURE(rc)) 1032 break; 1033 1034 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); 1035 Assert(pBlockRemove == pBlock); 1036 RTListNodeRemove(&pBlock->NodeLru); 1037 1038 pDiscard->cbDiscarding -= pBlock->cbDiscard; 1039 RTMemFree(pBlock->pbmAllocated); 1040 RTMemFree(pBlock); 1041 } 1042 1043 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew); 1044 1045 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1046 return rc; 1047 } 1048 1049 /** 1050 * Destroys the current discard state, writing any waiting blocks to the image. 1051 * 1052 * @returns VBox status code. 1053 * @param pDisk VD disk container. 1054 */ 1055 static int vdDiscardStateDestroy(PVBOXHDD pDisk) 1056 { 1057 int rc = VINF_SUCCESS; 1058 1059 if (pDisk->pDiscard) 1060 { 1061 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */); 1062 AssertRC(rc); 1063 RTMemFree(pDisk->pDiscard->pTreeBlocks); 1064 RTMemFree(pDisk->pDiscard); 1065 pDisk->pDiscard = NULL; 1066 } 1067 1068 return rc; 1069 } 1070 1071 /** 1072 * Marks the given range as allocated in the image. 1073 * Required if there are discards in progress and a write to a block which can get discarded 1074 * is written to. 1075 * 1076 * @returns VBox status code. 1077 * @param pDisk VD container data. 1078 * @param uOffset First byte to mark as allocated. 1079 * @param cbRange Number of bytes to mark as allocated. 1080 */ 1081 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange) 1082 { 1083 PVDDISCARDSTATE pDiscard = pDisk->pDiscard; 1084 int rc = VINF_SUCCESS; 1085 1086 if (pDiscard) 1087 { 1088 do 1089 { 1090 size_t cbThisRange = cbRange; 1091 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset); 1092 1093 if (pBlock) 1094 { 1095 int32_t idxStart, idxEnd; 1096 1097 Assert(!(cbThisRange % 512)); 1098 Assert(!((uOffset - pBlock->Core.Key) % 512)); 1099 1100 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); 1101 1102 idxStart = (uOffset - pBlock->Core.Key) / 512; 1103 idxEnd = idxStart + (cbThisRange / 512); 1104 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); 1105 } 1106 else 1107 { 1108 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true); 1109 if (pBlock) 1110 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset); 1111 } 1112 1113 Assert(cbRange >= cbThisRange); 1114 1115 uOffset += cbThisRange; 1116 cbRange -= cbThisRange; 1117 } while (cbRange != 0); 1118 } 1119 1120 return rc; 1121 } 1122 1123 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1124 uint64_t uOffset, size_t cbTransfer, 1125 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, 1126 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1127 uint32_t fFlags) 1128 { 1129 PVDIOCTX pIoCtx = NULL; 1130 1131 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1132 if (RT_LIKELY(pIoCtx)) 1133 { 1134 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1135 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1136 } 1137 1138 return pIoCtx; 1139 } 1140 1141 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1142 uint64_t uOffset, size_t cbTransfer, 1143 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1144 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1145 void *pvUser1, void *pvUser2, 1146 void *pvAllocation, 1147 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1148 uint32_t fFlags) 1149 { 1150 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1151 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1152 1153 if (RT_LIKELY(pIoCtx)) 1154 { 1155 pIoCtx->pIoCtxParent = NULL; 1156 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1157 pIoCtx->Type.Root.pvUser1 = pvUser1; 1158 pIoCtx->Type.Root.pvUser2 = pvUser2; 1159 } 1160 1161 LogFlow(("Allocated root I/O context %#p\n", pIoCtx)); 1162 return pIoCtx; 1163 } 1164 1165 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, 1166 unsigned cRanges, 1167 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1168 void *pvUser1, void *pvUser2, 1169 void *pvAllocation, 1170 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1171 uint32_t fFlags) 1172 { 1173 PVDIOCTX pIoCtx = NULL; 1174 1175 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1176 if (RT_LIKELY(pIoCtx)) 1177 { 1178 pIoCtx->pIoCtxNext = NULL; 1179 pIoCtx->pDisk = pDisk; 1180 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD; 1181 pIoCtx->cDataTransfersPending = 0; 1182 pIoCtx->cMetaTransfersPending = 0; 1183 pIoCtx->fComplete = false; 1184 pIoCtx->fFlags = fFlags; 1185 pIoCtx->pvAllocation = pvAllocation; 1186 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; 1187 pIoCtx->pfnIoCtxTransferNext = NULL; 1188 pIoCtx->rcReq = VINF_SUCCESS; 1189 pIoCtx->Req.Discard.paRanges = paRanges; 1190 pIoCtx->Req.Discard.cRanges = cRanges; 1191 pIoCtx->Req.Discard.idxRange = 0; 1192 pIoCtx->Req.Discard.cbDiscardLeft = 0; 1193 pIoCtx->Req.Discard.offCur = 0; 1194 pIoCtx->Req.Discard.cbThisDiscard = 0; 1195 1196 pIoCtx->pIoCtxParent = NULL; 1197 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1198 pIoCtx->Type.Root.pvUser1 = pvUser1; 1199 pIoCtx->Type.Root.pvUser2 = pvUser2; 1200 } 1201 1202 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx)); 1203 return pIoCtx; 1204 } 1205 1206 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1207 uint64_t uOffset, size_t cbTransfer, 1208 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1209 PVDIOCTX pIoCtxParent, size_t cbTransferParent, 1210 size_t cbWriteParent, void *pvAllocation, 1211 PFNVDIOCTXTRANSFER pfnIoCtxTransfer) 1212 { 1213 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1214 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0); 1215 1216 AssertPtr(pIoCtxParent); 1217 Assert(!pIoCtxParent->pIoCtxParent); 1218 1219 if (RT_LIKELY(pIoCtx)) 1220 { 1221 pIoCtx->pIoCtxParent = pIoCtxParent; 1222 pIoCtx->Type.Child.uOffsetSaved = uOffset; 1223 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer; 1224 pIoCtx->Type.Child.cbTransferParent = cbTransferParent; 1225 pIoCtx->Type.Child.cbWriteParent = cbWriteParent; 1226 } 1227 1228 LogFlow(("Allocated child I/O context %#p\n", pIoCtx)); 1229 return pIoCtx; 1230 } 1231 1232 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer) 1233 { 1234 PVDIOTASK pIoTask = NULL; 1235 1236 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1237 if (pIoTask) 1238 { 1239 pIoTask->pIoStorage = pIoStorage; 1240 pIoTask->pfnComplete = pfnComplete; 1241 pIoTask->pvUser = pvUser; 1242 pIoTask->fMeta = false; 1243 pIoTask->Type.User.cbTransfer = cbTransfer; 1244 pIoTask->Type.User.pIoCtx = pIoCtx; 1245 } 1246 1247 return pIoTask; 1248 } 1249 1250 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer) 1251 { 1252 PVDIOTASK pIoTask = NULL; 1253 1254 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1255 if (pIoTask) 1256 { 1257 pIoTask->pIoStorage = pIoStorage; 1258 pIoTask->pfnComplete = pfnComplete; 1259 pIoTask->pvUser = pvUser; 1260 pIoTask->fMeta = true; 1261 pIoTask->Type.Meta.pMetaXfer = pMetaXfer; 1262 } 1263 1264 return pIoTask; 1265 } 1266 1267 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1268 { 1269 LogFlow(("Freeing I/O context %#p\n", pIoCtx)); 1270 1271 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE)) 1272 { 1273 if (pIoCtx->pvAllocation) 1274 RTMemFree(pIoCtx->pvAllocation); 1275 #ifdef DEBUG 1276 memset(pIoCtx, 0xff, sizeof(VDIOCTX)); 1277 #endif 1278 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); 1279 } 1280 } 1281 1282 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) 1283 { 1284 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); 1285 } 1286 1287 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) 1288 { 1289 AssertPtr(pIoCtx->pIoCtxParent); 1290 1291 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 1292 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; 1293 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; 1294 } 1295 1296 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) 1297 { 1298 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb])); 1299 1300 if (RT_LIKELY(pMetaXfer)) 1301 { 1302 pMetaXfer->Core.Key = uOffset; 1303 pMetaXfer->Core.KeyLast = uOffset + cb - 1; 1304 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE; 1305 pMetaXfer->cbMeta = cb; 1306 pMetaXfer->pIoStorage = pIoStorage; 1307 pMetaXfer->cRefs = 0; 1308 RTListInit(&pMetaXfer->ListIoCtxWaiting); 1309 } 1310 return pMetaXfer; 1311 } 1312 1313 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) 1314 { 1315 /* Put it on the waiting list. */ 1316 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); 1317 PVDIOCTX pHeadOld; 1318 pIoCtx->pIoCtxNext = pNext; 1319 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) 1320 { 1321 pNext = pHeadOld; 1322 Assert(pNext != pIoCtx); 1323 pIoCtx->pIoCtxNext = pNext; 1324 ASMNopPause(); 1325 } 1326 } 1327 1328 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1329 { 1330 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); 1331 1332 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); 1333 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1334 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); 1335 } 1336 1337 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) 1338 { 1339 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData); 1340 } 1341 1342 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) 1343 { 1344 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); 1345 } 1346 1347 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) 1348 { 1349 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1350 } 1351 1352 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) 1353 { 1354 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1355 } 1356 1357 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) 1358 { 1359 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData); 1360 } 1361 1362 /** 1363 * Process the I/O context, core method which assumes that the I/O context 1364 * acquired the lock. 1365 * 1366 * @returns VBox status code. 1367 * @param pIoCtx I/O context to process. 1368 */ 1369 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) 1370 { 1371 int rc = VINF_SUCCESS; 1372 1373 VD_IS_LOCKED(pIoCtx->pDisk); 1374 1375 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1376 1377 if ( !pIoCtx->cMetaTransfersPending 1378 && !pIoCtx->cDataTransfersPending 1379 && !pIoCtx->pfnIoCtxTransfer) 1380 { 1381 rc = VINF_VD_ASYNC_IO_FINISHED; 1382 goto out; 1383 } 1384 1385 /* 1386 * We complete the I/O context in case of an error 1387 * if there is no I/O task pending. 1388 */ 1389 if ( RT_FAILURE(pIoCtx->rcReq) 1390 && !pIoCtx->cMetaTransfersPending 1391 && !pIoCtx->cDataTransfersPending) 1392 { 1393 rc = VINF_VD_ASYNC_IO_FINISHED; 1394 goto out; 1395 } 1396 1397 /* Don't change anything if there is a metadata transfer pending or we are blocked. */ 1398 if ( pIoCtx->cMetaTransfersPending 1399 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) 1400 { 1401 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1402 goto out; 1403 } 1404 1405 if (pIoCtx->pfnIoCtxTransfer) 1406 { 1407 /* Call the transfer function advancing to the next while there is no error. */ 1408 while ( pIoCtx->pfnIoCtxTransfer 1409 && !pIoCtx->cMetaTransfersPending 1410 && RT_SUCCESS(rc)) 1411 { 1412 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer)); 1413 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx); 1414 1415 /* Advance to the next part of the transfer if the current one succeeded. */ 1416 if (RT_SUCCESS(rc)) 1417 { 1418 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; 1419 pIoCtx->pfnIoCtxTransferNext = NULL; 1420 } 1421 } 1422 } 1423 1424 if ( RT_SUCCESS(rc) 1425 && !pIoCtx->cMetaTransfersPending 1426 && !pIoCtx->cDataTransfersPending) 1427 rc = VINF_VD_ASYNC_IO_FINISHED; 1428 else if ( RT_SUCCESS(rc) 1429 || rc == VERR_VD_NOT_ENOUGH_METADATA 1430 || rc == VERR_VD_IOCTX_HALT) 1431 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1432 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) 1433 { 1434 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); 1435 /* 1436 * The I/O context completed if we have an error and there is no data 1437 * or meta data transfer pending. 1438 */ 1439 if ( !pIoCtx->cMetaTransfersPending 1440 && !pIoCtx->cDataTransfersPending) 1441 rc = VINF_VD_ASYNC_IO_FINISHED; 1442 else 1443 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1444 } 1445 1446 out: 1447 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 1448 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending, 1449 pIoCtx->fComplete)); 1450 1451 return rc; 1452 } 1453 1454 /** 1455 * Processes the list of waiting I/O contexts. 1456 * 1457 * @returns VBox status code. 1458 * @param pDisk The disk structure. 1459 * @param pIoCtxRc An I/O context handle which waits on the list. When processed 1460 * The status code is returned. NULL if there is no I/O context 1461 * to return the status code for. 1462 */ 1463 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) 1464 { 1465 int rc = VINF_SUCCESS; 1466 1467 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); 1468 1469 VD_IS_LOCKED(pDisk); 1470 1471 /* Get the waiting list and process it in FIFO order. */ 1472 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); 1473 1474 /* Reverse it. */ 1475 PVDIOCTX pCur = pIoCtxHead; 1476 pIoCtxHead = NULL; 1477 while (pCur) 1478 { 1479 PVDIOCTX pInsert = pCur; 1480 pCur = pCur->pIoCtxNext; 1481 pInsert->pIoCtxNext = pIoCtxHead; 1482 pIoCtxHead = pInsert; 1483 } 1484 1485 /* Process now. */ 1486 pCur = pIoCtxHead; 1487 while (pCur) 1488 { 1489 int rcTmp; 1490 PVDIOCTX pTmp = pCur; 1491 1492 pCur = pCur->pIoCtxNext; 1493 pTmp->pIoCtxNext = NULL; 1494 1495 /* 1496 * Need to clear the sync flag here if there is a new I/O context 1497 * with it set and the context is not given in pIoCtxRc. 1498 * This happens most likely on a different thread and that one shouldn't 1499 * process the context synchronously. 1500 * 1501 * The thread who issued the context will wait on the event semaphore 1502 * anyway which is signalled when the completion handler is called. 1503 */ 1504 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC 1505 && pTmp != pIoCtxRc) 1506 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC; 1507 1508 rcTmp = vdIoCtxProcessLocked(pTmp); 1509 if (pTmp == pIoCtxRc) 1510 { 1511 /* The given I/O context was processed, pass the return code to the caller. */ 1512 rc = rcTmp; 1513 } 1514 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED 1515 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1516 { 1517 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1518 vdThreadFinishWrite(pDisk); 1519 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1520 pTmp->Type.Root.pvUser2, 1521 pTmp->rcReq); 1522 vdIoCtxFree(pDisk, pTmp); 1523 } 1524 } 1525 1526 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1527 return rc; 1528 } 1529 1530 /** 1531 * Processes the list of blocked I/O contexts. 1532 * 1533 * @returns nothing. 1534 * @param pDisk The disk structure. 1535 */ 1536 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) 1537 { 1538 LogFlowFunc(("pDisk=%#p\n", pDisk)); 1539 1540 VD_IS_LOCKED(pDisk); 1541 1542 /* Get the waiting list and process it in FIFO order. */ 1543 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); 1544 1545 /* Reverse it. */ 1546 PVDIOCTX pCur = pIoCtxHead; 1547 pIoCtxHead = NULL; 1548 while (pCur) 1549 { 1550 PVDIOCTX pInsert = pCur; 1551 pCur = pCur->pIoCtxNext; 1552 pInsert->pIoCtxNext = pIoCtxHead; 1553 pIoCtxHead = pInsert; 1554 } 1555 1556 /* Process now. */ 1557 pCur = pIoCtxHead; 1558 while (pCur) 1559 { 1560 int rc; 1561 PVDIOCTX pTmp = pCur; 1562 1563 pCur = pCur->pIoCtxNext; 1564 pTmp->pIoCtxNext = NULL; 1565 1566 Assert(!pTmp->pIoCtxParent); 1567 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); 1568 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; 1569 1570 rc = vdIoCtxProcessLocked(pTmp); 1571 if ( rc == VINF_VD_ASYNC_IO_FINISHED 1572 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1573 { 1574 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1575 vdThreadFinishWrite(pDisk); 1576 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1577 pTmp->Type.Root.pvUser2, 1578 pTmp->rcReq); 1579 vdIoCtxFree(pDisk, pTmp); 1580 } 1581 } 1582 1583 LogFlowFunc(("returns\n")); 1584 } 1585 1586 /** 1587 * Processes the I/O context trying to lock the criticial section. 1588 * The context is deferred if the critical section is busy. 1589 * 1590 * @returns VBox status code. 1591 * @param pIoCtx The I/O context to process. 1592 */ 1593 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) 1594 { 1595 int rc = VINF_SUCCESS; 1596 PVBOXHDD pDisk = pIoCtx->pDisk; 1597 1598 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1599 1600 /* Put it on the waiting list first. */ 1601 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); 1602 1603 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) 1604 { 1605 /* Leave it again, the context will be processed just before leaving the lock. */ 1606 LogFlowFunc(("Successfully acquired the lock\n")); 1607 rc = vdDiskUnlock(pDisk, pIoCtx); 1608 } 1609 else 1610 { 1611 LogFlowFunc(("Lock is held\n")); 1612 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1613 } 1614 1615 return rc; 1616 } 1617 1618 /** 1619 * Process the I/O context in a synchronous manner, waiting 1620 * for it to complete. 1621 * 1622 * @returns VBox status code of the completed request. 1623 * @param pIoCtx The sync I/O context. 1624 */ 1625 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) 1626 { 1627 int rc = VINF_SUCCESS; 1628 PVBOXHDD pDisk = pIoCtx->pDisk; 1629 1630 LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); 1631 1632 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, 1633 ("I/O context is not marked as synchronous\n")); 1634 1635 rc = vdIoCtxProcessTryLockDefer(pIoCtx); 1636 if (rc == VINF_VD_ASYNC_IO_FINISHED) 1637 rc = VINF_SUCCESS; 1638 1639 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1640 { 1641 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); 1642 AssertRC(rc); 1643 1644 rc = pDisk->rcSync; 1645 } 1646 else /* Success or error. */ 1647 vdIoCtxFree(pDisk, pIoCtx); 1648 1649 return rc; 1650 } 1651 1652 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1653 { 1654 return pDisk->pIoCtxLockOwner == pIoCtx; 1655 } 1656 1657 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1658 { 1659 int rc = VINF_SUCCESS; 1660 1661 VD_IS_LOCKED(pDisk); 1662 1663 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); 1664 1665 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) 1666 { 1667 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ 1668 vdIoCtxDefer(pDisk, pIoCtx); 1669 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1670 } 1671 1672 LogFlowFunc(("returns -> %Rrc\n", rc)); 1673 return rc; 1674 } 1675 1676 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) 1677 { 1678 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", 1679 pDisk, pIoCtx, fProcessBlockedReqs)); 1680 1681 VD_IS_LOCKED(pDisk); 1682 1683 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); 1684 Assert(pDisk->pIoCtxLockOwner == pIoCtx); 1685 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); 1686 1687 if (fProcessBlockedReqs) 1688 { 1689 /* Process any blocked writes if the current request didn't caused another growing. */ 1690 vdDiskProcessBlockedIoCtx(pDisk); 1691 } 1692 1693 LogFlowFunc(("returns\n")); 1694 } 1695 1696 /** 910 1697 * Internal: Reads a given amount of data from the image chain of the disk. 911 1698 **/ 912 1699 static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, 913 uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)1700 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead) 914 1701 { 915 1702 int rc = VINF_SUCCESS; 916 1703 size_t cbThisRead = cbRead; 917 RTSGSEG SegmentBuf;918 RTSGBUF SgBuf;919 VDIOCTX IoCtx;920 1704 921 1705 AssertPtr(pcbThisRead); 922 1706 923 1707 *pcbThisRead = 0; 924 925 SegmentBuf.pvSeg = pvBuf;926 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;927 RTSgBufInit(&SgBuf, &SegmentBuf, 1);928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,929 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);930 1708 931 1709 /* … … 934 1712 */ 935 1713 rc = pImage->Backend->pfnRead(pImage->pBackendData, 936 uOffset, cbThisRead, &IoCtx,937 1714 uOffset, cbThisRead, pIoCtx, 1715 &cbThisRead); 938 1716 939 1717 if (rc == VERR_VD_BLOCK_FREE) … … 944 1722 { 945 1723 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 946 uOffset, cbThisRead, &IoCtx,947 1724 uOffset, cbThisRead, pIoCtx, 1725 &cbThisRead); 948 1726 } 949 1727 } … … 952 1730 *pcbThisRead = cbThisRead; 953 1731 1732 return rc; 1733 } 1734 1735 /** 1736 * internal: read the specified amount of data in whatever blocks the backend 1737 * will give us - async version. 1738 */ 1739 static int vdReadHelperAsync(PVDIOCTX pIoCtx) 1740 { 1741 int rc; 1742 PVBOXHDD pDisk = pIoCtx->pDisk; 1743 size_t cbToRead = pIoCtx->Req.Io.cbTransfer; 1744 uint64_t uOffset = pIoCtx->Req.Io.uOffset; 1745 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur; 1746 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride; 1747 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead; 1748 size_t cbThisRead; 1749 1750 /* Loop until all reads started or we have a backend which needs to read metadata. */ 1751 do 1752 { 1753 /* Search for image with allocated block. Do not attempt to read more 1754 * than the previous reads marked as valid. Otherwise this would return 1755 * stale data when different block sizes are used for the images. */ 1756 cbThisRead = cbToRead; 1757 1758 if ( pDisk->pCache 1759 && !pImageParentOverride) 1760 { 1761 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead, 1762 pIoCtx, &cbThisRead); 1763 if (rc == VERR_VD_BLOCK_FREE) 1764 { 1765 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead, 1766 pIoCtx, &cbThisRead); 1767 1768 /* If the read was successful, write the data back into the cache. */ 1769 if ( RT_SUCCESS(rc) 1770 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE) 1771 { 1772 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead, 1773 pIoCtx, NULL); 1774 } 1775 } 1776 } 1777 else 1778 { 1779 1780 /* 1781 * Try to read from the given image. 1782 * If the block is not allocated read from override chain if present. 1783 */ 1784 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1785 uOffset, cbThisRead, pIoCtx, 1786 &cbThisRead); 1787 1788 if ( rc == VERR_VD_BLOCK_FREE 1789 && cImagesRead != 1) 1790 { 1791 unsigned cImagesToProcess = cImagesRead; 1792 1793 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev; 1794 pIoCtx->Req.Io.pImageParentOverride = NULL; 1795 1796 while (pCurrImage && rc == VERR_VD_BLOCK_FREE) 1797 { 1798 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1799 uOffset, cbThisRead, 1800 pIoCtx, &cbThisRead); 1801 if (cImagesToProcess == 1) 1802 break; 1803 else if (cImagesToProcess > 0) 1804 cImagesToProcess--; 1805 1806 if (rc == VERR_VD_BLOCK_FREE) 1807 pCurrImage = pCurrImage->pPrev; 1808 } 1809 } 1810 } 1811 1812 /* The task state will be updated on success already, don't do it here!. */ 1813 if (rc == VERR_VD_BLOCK_FREE) 1814 { 1815 /* No image in the chain contains the data for the block. */ 1816 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); 1817 1818 /* Fill the free space with 0 if we are told to do so 1819 * or a previous read returned valid data. */ 1820 if ( (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) 1821 || !pIoCtx->Req.Io.fAllFree) 1822 vdIoCtxSet(pIoCtx, '\0', cbThisRead); 1823 else 1824 pIoCtx->Req.Io.cbBufClear += cbThisRead; 1825 1826 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1827 rc = VINF_VD_NEW_ZEROED_BLOCK; 1828 else 1829 rc = VINF_SUCCESS; 1830 } 1831 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1832 rc = VINF_SUCCESS; 1833 else if (rc == VERR_VD_IOCTX_HALT) 1834 { 1835 uOffset += cbThisRead; 1836 cbToRead -= cbThisRead; 1837 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1838 } 1839 else if ( RT_SUCCESS(rc) 1840 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) 1841 { 1842 /* First not free block, fill the space before with 0. */ 1843 RTSGBUF SgBuf; 1844 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf); 1845 RTSgBufReset(&SgBuf); 1846 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear); 1847 pIoCtx->Req.Io.cbBufClear = 0; 1848 pIoCtx->Req.Io.fAllFree = false; 1849 } 1850 1851 if (RT_FAILURE(rc)) 1852 break; 1853 1854 cbToRead -= cbThisRead; 1855 uOffset += cbThisRead; 1856 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */ 1857 } while (cbToRead != 0 && RT_SUCCESS(rc)); 1858 1859 if ( rc == VERR_VD_NOT_ENOUGH_METADATA 1860 || rc == VERR_VD_IOCTX_HALT) 1861 { 1862 /* Save the current state. */ 1863 pIoCtx->Req.Io.uOffset = uOffset; 1864 pIoCtx->Req.Io.cbTransfer = cbToRead; 1865 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; 1866 } 1867 1868 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) && pIoCtx->Req.Io.fAllFree) 1869 ? VERR_VD_BLOCK_FREE 1870 : rc; 1871 } 1872 1873 /** 1874 * internal: parent image read wrapper for compacting. 1875 */ 1876 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, 1877 size_t cbRead) 1878 { 1879 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; 1880 1881 /** @todo 1882 * Only used for compaction so far which is not possible to mix with async I/O. 1883 * Needs to be changed if we want to support online compaction of images. 1884 */ 1885 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true); 1886 AssertMsgReturn(!fLocked, 1887 ("Calling synchronous parent read while another thread holds the disk lock\n"), 1888 VERR_VD_INVALID_STATE); 1889 1890 /* Fake an I/O context. */ 1891 RTSGSEG Segment; 1892 RTSGBUF SgBuf; 1893 VDIOCTX IoCtx; 1894 1895 Segment.pvSeg = pvBuf; 1896 Segment.cbSeg = cbRead; 1897 RTSgBufInit(&SgBuf, &Segment, 1); 1898 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage, 1899 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1900 int rc = vdReadHelperAsync(&IoCtx); 1901 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false); 954 1902 return rc; 955 1903 } … … 984 1932 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) 985 1933 { 986 int rc = VINF_SUCCESS; 987 size_t cbThisRead; 988 bool fAllFree = true; 989 size_t cbBufClear = 0; 990 991 /* Loop until all read. */ 992 do 993 { 994 /* Search for image with allocated block. Do not attempt to read more 995 * than the previous reads marked as valid. Otherwise this would return 996 * stale data when different block sizes are used for the images. */ 997 cbThisRead = cbRead; 998 999 if ( pDisk->pCache 1000 && !pImageParentOverride) 1001 { 1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1003 rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, 1004 cbThisRead, &cbThisRead); 1005 #endif 1006 if (rc == VERR_VD_BLOCK_FREE) 1007 { 1008 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, 1009 &cbThisRead); 1010 1011 /* If the read was successful, write the data back into the cache. */ 1012 if ( RT_SUCCESS(rc) 1013 && fUpdateCache) 1014 { 1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1016 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, 1017 cbThisRead, NULL); 1018 #endif 1019 } 1020 } 1021 } 1022 else 1023 { 1024 RTSGSEG SegmentBuf; 1025 RTSGBUF SgBuf; 1026 VDIOCTX IoCtx; 1027 1028 SegmentBuf.pvSeg = pvBuf; 1029 SegmentBuf.cbSeg = cbThisRead; 1030 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 1031 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 1032 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1033 1034 /* 1035 * Try to read from the given image. 1036 * If the block is not allocated read from override chain if present. 1037 */ 1038 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1039 uOffset, cbThisRead, &IoCtx, 1040 &cbThisRead); 1041 1042 if ( rc == VERR_VD_BLOCK_FREE 1043 && cImagesRead != 1) 1044 { 1045 unsigned cImagesToProcess = cImagesRead; 1046 1047 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; 1048 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; 1049 pCurrImage = pCurrImage->pPrev) 1050 { 1051 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1052 uOffset, cbThisRead, 1053 &IoCtx, &cbThisRead); 1054 if (cImagesToProcess == 1) 1055 break; 1056 else if (cImagesToProcess > 0) 1057 cImagesToProcess--; 1058 } 1059 } 1060 } 1061 1062 /* No image in the chain contains the data for the block. */ 1063 if (rc == VERR_VD_BLOCK_FREE) 1064 { 1065 /* Fill the free space with 0 if we are told to do so 1066 * or a previous read returned valid data. */ 1067 if (fZeroFreeBlocks || !fAllFree) 1068 memset(pvBuf, '\0', cbThisRead); 1069 else 1070 cbBufClear += cbThisRead; 1071 1072 if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1073 rc = VINF_VD_NEW_ZEROED_BLOCK; 1074 else 1075 rc = VINF_SUCCESS; 1076 } 1077 else if (RT_SUCCESS(rc)) 1078 { 1079 /* First not free block, fill the space before with 0. */ 1080 if (!fZeroFreeBlocks) 1081 { 1082 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); 1083 cbBufClear = 0; 1084 fAllFree = false; 1085 } 1086 } 1087 1088 cbRead -= cbThisRead; 1089 uOffset += cbThisRead; 1090 pvBuf = (char *)pvBuf + cbThisRead; 1091 } while (cbRead != 0 && RT_SUCCESS(rc)); 1092 1093 return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; 1934 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 1935 RTSGSEG Segment; 1936 RTSGBUF SgBuf; 1937 VDIOCTX IoCtx; 1938 1939 if (fZeroFreeBlocks) 1940 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1941 if (fUpdateCache) 1942 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 1943 1944 Segment.pvSeg = pvBuf; 1945 Segment.cbSeg = cbRead; 1946 RTSgBufInit(&SgBuf, &Segment, 1); 1947 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, 1948 NULL, vdReadHelperAsync, fFlags); 1949 1950 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 1951 IoCtx.Req.Io.cImagesRead = cImagesRead; 1952 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 1953 IoCtx.Type.Root.pvUser1 = pDisk; 1954 IoCtx.Type.Root.pvUser2 = NULL; 1955 return vdIoCtxProcessSync(&IoCtx); 1094 1956 } 1095 1957 … … 1103 1965 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, 1104 1966 true /* fZeroFreeBlocks */, fUpdateCache, 0); 1105 }1106 1107 /**1108 * Creates a new empty discard state.1109 *1110 * @returns Pointer to the new discard state or NULL if out of memory.1111 */1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)1113 {1114 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));1115 1116 if (pDiscard)1117 {1118 RTListInit(&pDiscard->ListLru);1119 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));1120 if (!pDiscard->pTreeBlocks)1121 {1122 RTMemFree(pDiscard);1123 pDiscard = NULL;1124 }1125 }1126 1127 return pDiscard;1128 }1129 1130 /**1131 * Removes the least recently used blocks from the waiting list until1132 * the new value is reached.1133 *1134 * @returns VBox status code.1135 * @param pDisk VD disk container.1136 * @param pDiscard The discard state.1137 * @param cbDiscardingNew How many bytes should be waiting on success.1138 * The number of bytes waiting can be less.1139 */1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)1141 {1142 int rc = VINF_SUCCESS;1143 1144 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",1145 pDisk, pDiscard, cbDiscardingNew));1146 1147 while (pDiscard->cbDiscarding > cbDiscardingNew)1148 {1149 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);1150 1151 Assert(!RTListIsEmpty(&pDiscard->ListLru));1152 1153 /* Go over the allocation bitmap and mark all discarded sectors as unused. */1154 uint64_t offStart = pBlock->Core.Key;1155 uint32_t idxStart = 0;1156 size_t cbLeft = pBlock->cbDiscard;1157 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);1158 uint32_t cSectors = pBlock->cbDiscard / 512;1159 1160 while (cbLeft > 0)1161 {1162 int32_t idxEnd;1163 size_t cbThis = cbLeft;1164 1165 if (fAllocated)1166 {1167 /* Check for the first unallocated bit. */1168 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);1169 if (idxEnd != -1)1170 {1171 cbThis = (idxEnd - idxStart) * 512;1172 fAllocated = false;1173 }1174 }1175 else1176 {1177 /* Mark as unused and check for the first set bit. */1178 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);1179 if (idxEnd != -1)1180 cbThis = (idxEnd - idxStart) * 512;1181 1182 1183 VDIOCTX IoCtx;1184 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,1185 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);1186 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,1187 &IoCtx, offStart, cbThis, NULL,1188 NULL, &cbThis, NULL,1189 VD_DISCARD_MARK_UNUSED);1190 if (RT_FAILURE(rc))1191 break;1192 1193 fAllocated = true;1194 }1195 1196 idxStart = idxEnd;1197 offStart += cbThis;1198 cbLeft -= cbThis;1199 }1200 1201 if (RT_FAILURE(rc))1202 break;1203 1204 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);1205 Assert(pBlockRemove == pBlock);1206 RTListNodeRemove(&pBlock->NodeLru);1207 1208 pDiscard->cbDiscarding -= pBlock->cbDiscard;1209 RTMemFree(pBlock->pbmAllocated);1210 RTMemFree(pBlock);1211 }1212 1213 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);1214 1215 LogFlowFunc(("returns rc=%Rrc\n", rc));1216 return rc;1217 }1218 1219 /**1220 * Destroys the current discard state, writing any waiting blocks to the image.1221 *1222 * @returns VBox status code.1223 * @param pDisk VD disk container.1224 */1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)1226 {1227 int rc = VINF_SUCCESS;1228 1229 if (pDisk->pDiscard)1230 {1231 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);1232 AssertRC(rc);1233 RTMemFree(pDisk->pDiscard->pTreeBlocks);1234 RTMemFree(pDisk->pDiscard);1235 pDisk->pDiscard = NULL;1236 }1237 1238 return rc;1239 }1240 1241 /**1242 * Marks the given range as allocated in the image.1243 * Required if there are discards in progress and a write to a block which can get discarded1244 * is written to.1245 *1246 * @returns VBox status code.1247 * @param pDisk VD container data.1248 * @param uOffset First byte to mark as allocated.1249 * @param cbRange Number of bytes to mark as allocated.1250 */1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)1252 {1253 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;1254 int rc = VINF_SUCCESS;1255 1256 if (pDiscard)1257 {1258 do1259 {1260 size_t cbThisRange = cbRange;1261 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);1262 1263 if (pBlock)1264 {1265 int32_t idxStart, idxEnd;1266 1267 Assert(!(cbThisRange % 512));1268 Assert(!((uOffset - pBlock->Core.Key) % 512));1269 1270 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);1271 1272 idxStart = (uOffset - pBlock->Core.Key) / 512;1273 idxEnd = idxStart + (cbThisRange / 512);1274 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);1275 }1276 else1277 {1278 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);1279 if (pBlock)1280 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);1281 }1282 1283 Assert(cbRange >= cbThisRange);1284 1285 uOffset += cbThisRange;1286 cbRange -= cbThisRange;1287 } while (cbRange != 0);1288 }1289 1290 return rc;1291 }1292 1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1294 uint64_t uOffset, size_t cbTransfer,1295 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,1296 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1297 uint32_t fFlags)1298 {1299 PVDIOCTX pIoCtx = NULL;1300 1301 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1302 if (RT_LIKELY(pIoCtx))1303 {1304 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1305 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1306 }1307 1308 return pIoCtx;1309 }1310 1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1312 uint64_t uOffset, size_t cbTransfer,1313 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1314 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1315 void *pvUser1, void *pvUser2,1316 void *pvAllocation,1317 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1318 uint32_t fFlags)1319 {1320 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1321 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1322 1323 if (RT_LIKELY(pIoCtx))1324 {1325 pIoCtx->pIoCtxParent = NULL;1326 pIoCtx->Type.Root.pfnComplete = pfnComplete;1327 pIoCtx->Type.Root.pvUser1 = pvUser1;1328 pIoCtx->Type.Root.pvUser2 = pvUser2;1329 }1330 1331 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));1332 return pIoCtx;1333 }1334 1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,1336 unsigned cRanges,1337 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1338 void *pvUser1, void *pvUser2,1339 void *pvAllocation,1340 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1341 uint32_t fFlags)1342 {1343 PVDIOCTX pIoCtx = NULL;1344 1345 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1346 if (RT_LIKELY(pIoCtx))1347 {1348 pIoCtx->pIoCtxNext = NULL;1349 pIoCtx->pDisk = pDisk;1350 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;1351 pIoCtx->cDataTransfersPending = 0;1352 pIoCtx->cMetaTransfersPending = 0;1353 pIoCtx->fComplete = false;1354 pIoCtx->fFlags = fFlags;1355 pIoCtx->pvAllocation = pvAllocation;1356 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;1357 pIoCtx->pfnIoCtxTransferNext = NULL;1358 pIoCtx->rcReq = VINF_SUCCESS;1359 pIoCtx->Req.Discard.paRanges = paRanges;1360 pIoCtx->Req.Discard.cRanges = cRanges;1361 pIoCtx->Req.Discard.idxRange = 0;1362 pIoCtx->Req.Discard.cbDiscardLeft = 0;1363 pIoCtx->Req.Discard.offCur = 0;1364 pIoCtx->Req.Discard.cbThisDiscard = 0;1365 1366 pIoCtx->pIoCtxParent = NULL;1367 pIoCtx->Type.Root.pfnComplete = pfnComplete;1368 pIoCtx->Type.Root.pvUser1 = pvUser1;1369 pIoCtx->Type.Root.pvUser2 = pvUser2;1370 }1371 1372 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));1373 return pIoCtx;1374 }1375 1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1377 uint64_t uOffset, size_t cbTransfer,1378 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1379 PVDIOCTX pIoCtxParent, size_t cbTransferParent,1380 size_t cbWriteParent, void *pvAllocation,1381 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)1382 {1383 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1384 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);1385 1386 AssertPtr(pIoCtxParent);1387 Assert(!pIoCtxParent->pIoCtxParent);1388 1389 if (RT_LIKELY(pIoCtx))1390 {1391 pIoCtx->pIoCtxParent = pIoCtxParent;1392 pIoCtx->Type.Child.uOffsetSaved = uOffset;1393 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;1394 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;1395 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;1396 }1397 1398 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));1399 return pIoCtx;1400 }1401 1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)1403 {1404 PVDIOTASK pIoTask = NULL;1405 1406 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1407 if (pIoTask)1408 {1409 pIoTask->pIoStorage = pIoStorage;1410 pIoTask->pfnComplete = pfnComplete;1411 pIoTask->pvUser = pvUser;1412 pIoTask->fMeta = false;1413 pIoTask->Type.User.cbTransfer = cbTransfer;1414 pIoTask->Type.User.pIoCtx = pIoCtx;1415 }1416 1417 return pIoTask;1418 }1419 1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)1421 {1422 PVDIOTASK pIoTask = NULL;1423 1424 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1425 if (pIoTask)1426 {1427 pIoTask->pIoStorage = pIoStorage;1428 pIoTask->pfnComplete = pfnComplete;1429 pIoTask->pvUser = pvUser;1430 pIoTask->fMeta = true;1431 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;1432 }1433 1434 return pIoTask;1435 }1436 1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1438 {1439 LogFlow(("Freeing I/O context %#p\n", pIoCtx));1440 if (pIoCtx->pvAllocation)1441 RTMemFree(pIoCtx->pvAllocation);1442 #ifdef DEBUG1443 memset(pIoCtx, 0xff, sizeof(VDIOCTX));1444 #endif1445 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);1446 }1447 1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)1449 {1450 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);1451 }1452 1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)1454 {1455 AssertPtr(pIoCtx->pIoCtxParent);1456 1457 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);1458 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;1459 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;1460 }1461 1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)1463 {1464 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));1465 1466 if (RT_LIKELY(pMetaXfer))1467 {1468 pMetaXfer->Core.Key = uOffset;1469 pMetaXfer->Core.KeyLast = uOffset + cb - 1;1470 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;1471 pMetaXfer->cbMeta = cb;1472 pMetaXfer->pIoStorage = pIoStorage;1473 pMetaXfer->cRefs = 0;1474 RTListInit(&pMetaXfer->ListIoCtxWaiting);1475 }1476 return pMetaXfer;1477 }1478 1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)1480 {1481 /* Put it on the waiting list. */1482 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);1483 PVDIOCTX pHeadOld;1484 pIoCtx->pIoCtxNext = pNext;1485 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))1486 {1487 pNext = pHeadOld;1488 Assert(pNext != pIoCtx);1489 pIoCtx->pIoCtxNext = pNext;1490 ASMNopPause();1491 }1492 }1493 1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1495 {1496 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));1497 1498 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));1499 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1500 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);1501 }1502 1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)1504 {1505 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);1506 }1507 1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)1509 {1510 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);1511 }1512 1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)1514 {1515 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1516 }1517 1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)1519 {1520 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1521 }1522 1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)1524 {1525 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);1526 }1527 1528 /**1529 * Process the I/O context, core method which assumes that the I/O context1530 * acquired the lock.1531 *1532 * @returns VBox status code.1533 * @param pIoCtx I/O context to process.1534 */1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)1536 {1537 int rc = VINF_SUCCESS;1538 1539 VD_IS_LOCKED(pIoCtx->pDisk);1540 1541 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1542 1543 if ( !pIoCtx->cMetaTransfersPending1544 && !pIoCtx->cDataTransfersPending1545 && !pIoCtx->pfnIoCtxTransfer)1546 {1547 rc = VINF_VD_ASYNC_IO_FINISHED;1548 goto out;1549 }1550 1551 /*1552 * We complete the I/O context in case of an error1553 * if there is no I/O task pending.1554 */1555 if ( RT_FAILURE(pIoCtx->rcReq)1556 && !pIoCtx->cMetaTransfersPending1557 && !pIoCtx->cDataTransfersPending)1558 {1559 rc = VINF_VD_ASYNC_IO_FINISHED;1560 goto out;1561 }1562 1563 /* Don't change anything if there is a metadata transfer pending or we are blocked. */1564 if ( pIoCtx->cMetaTransfersPending1565 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))1566 {1567 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1568 goto out;1569 }1570 1571 if (pIoCtx->pfnIoCtxTransfer)1572 {1573 /* Call the transfer function advancing to the next while there is no error. */1574 while ( pIoCtx->pfnIoCtxTransfer1575 && !pIoCtx->cMetaTransfersPending1576 && RT_SUCCESS(rc))1577 {1578 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));1579 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);1580 1581 /* Advance to the next part of the transfer if the current one succeeded. */1582 if (RT_SUCCESS(rc))1583 {1584 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;1585 pIoCtx->pfnIoCtxTransferNext = NULL;1586 }1587 }1588 }1589 1590 if ( RT_SUCCESS(rc)1591 && !pIoCtx->cMetaTransfersPending1592 && !pIoCtx->cDataTransfersPending)1593 rc = VINF_VD_ASYNC_IO_FINISHED;1594 else if ( RT_SUCCESS(rc)1595 || rc == VERR_VD_NOT_ENOUGH_METADATA1596 || rc == VERR_VD_IOCTX_HALT)1597 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1598 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))1599 {1600 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);1601 /*1602 * The I/O context completed if we have an error and there is no data1603 * or meta data transfer pending.1604 */1605 if ( !pIoCtx->cMetaTransfersPending1606 && !pIoCtx->cDataTransfersPending)1607 rc = VINF_VD_ASYNC_IO_FINISHED;1608 else1609 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1610 }1611 1612 out:1613 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",1614 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,1615 pIoCtx->fComplete));1616 1617 return rc;1618 }1619 1620 /**1621 * Processes the list of waiting I/O contexts.1622 *1623 * @returns VBox status code.1624 * @param pDisk The disk structure.1625 * @param pIoCtxRc An I/O context handle which waits on the list. When processed1626 * The status code is returned. NULL if there is no I/O context1627 * to return the status code for.1628 */1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)1630 {1631 int rc = VINF_SUCCESS;1632 1633 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));1634 1635 VD_IS_LOCKED(pDisk);1636 1637 /* Get the waiting list and process it in FIFO order. */1638 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);1639 1640 /* Reverse it. */1641 PVDIOCTX pCur = pIoCtxHead;1642 pIoCtxHead = NULL;1643 while (pCur)1644 {1645 PVDIOCTX pInsert = pCur;1646 pCur = pCur->pIoCtxNext;1647 pInsert->pIoCtxNext = pIoCtxHead;1648 pIoCtxHead = pInsert;1649 }1650 1651 /* Process now. */1652 pCur = pIoCtxHead;1653 while (pCur)1654 {1655 int rcTmp;1656 PVDIOCTX pTmp = pCur;1657 1658 pCur = pCur->pIoCtxNext;1659 pTmp->pIoCtxNext = NULL;1660 1661 rcTmp = vdIoCtxProcessLocked(pTmp);1662 if (pTmp == pIoCtxRc)1663 {1664 /* The given I/O context was processed, pass the return code to the caller. */1665 rc = rcTmp;1666 }1667 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED1668 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1669 {1670 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1671 vdThreadFinishWrite(pDisk);1672 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1673 pTmp->Type.Root.pvUser2,1674 pTmp->rcReq);1675 vdIoCtxFree(pDisk, pTmp);1676 }1677 }1678 1679 LogFlowFunc(("returns rc=%Rrc\n", rc));1680 return rc;1681 }1682 1683 /**1684 * Processes the list of blocked I/O contexts.1685 *1686 * @returns nothing.1687 * @param pDisk The disk structure.1688 */1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)1690 {1691 LogFlowFunc(("pDisk=%#p\n", pDisk));1692 1693 VD_IS_LOCKED(pDisk);1694 1695 /* Get the waiting list and process it in FIFO order. */1696 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);1697 1698 /* Reverse it. */1699 PVDIOCTX pCur = pIoCtxHead;1700 pIoCtxHead = NULL;1701 while (pCur)1702 {1703 PVDIOCTX pInsert = pCur;1704 pCur = pCur->pIoCtxNext;1705 pInsert->pIoCtxNext = pIoCtxHead;1706 pIoCtxHead = pInsert;1707 }1708 1709 /* Process now. */1710 pCur = pIoCtxHead;1711 while (pCur)1712 {1713 int rc;1714 PVDIOCTX pTmp = pCur;1715 1716 pCur = pCur->pIoCtxNext;1717 pTmp->pIoCtxNext = NULL;1718 1719 Assert(!pTmp->pIoCtxParent);1720 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);1721 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;1722 1723 rc = vdIoCtxProcessLocked(pTmp);1724 if ( rc == VINF_VD_ASYNC_IO_FINISHED1725 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1726 {1727 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1728 vdThreadFinishWrite(pDisk);1729 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1730 pTmp->Type.Root.pvUser2,1731 pTmp->rcReq);1732 vdIoCtxFree(pDisk, pTmp);1733 }1734 }1735 1736 LogFlowFunc(("returns\n"));1737 }1738 1739 /**1740 * Processes the I/O context trying to lock the criticial section.1741 * The context is deferred if the critical section is busy.1742 *1743 * @returns VBox status code.1744 * @param pIoCtx The I/O context to process.1745 */1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)1747 {1748 int rc = VINF_SUCCESS;1749 PVBOXHDD pDisk = pIoCtx->pDisk;1750 1751 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1752 1753 /* Put it on the waiting list first. */1754 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);1755 1756 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))1757 {1758 /* Leave it again, the context will be processed just before leaving the lock. */1759 LogFlowFunc(("Successfully acquired the lock\n"));1760 rc = vdDiskUnlock(pDisk, pIoCtx);1761 }1762 else1763 {1764 LogFlowFunc(("Lock is held\n"));1765 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1766 }1767 1768 return rc;1769 }1770 1771 /**1772 * Process the I/O context in a synchronous manner, waiting1773 * for it to complete.1774 *1775 * @returns VBox status code of the completed request.1776 * @param pIoCtx The sync I/O context.1777 */1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)1779 {1780 int rc = VINF_SUCCESS;1781 PVBOXHDD pDisk = pIoCtx->pDisk;1782 1783 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));1784 1785 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,1786 ("I/O context is not marked as synchronous\n"));1787 1788 rc = vdIoCtxProcessTryLockDefer(pIoCtx);1789 if (rc == VINF_VD_ASYNC_IO_FINISHED)1790 rc = VINF_SUCCESS;1791 1792 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1793 {1794 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);1795 AssertRC(rc);1796 1797 rc = pDisk->rcSync;1798 }1799 else /* Success or error. */1800 vdIoCtxFree(pDisk, pIoCtx);1801 1802 return rc;1803 }1804 1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1806 {1807 return pDisk->pIoCtxLockOwner == pIoCtx;1808 }1809 1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1811 {1812 int rc = VINF_SUCCESS;1813 1814 VD_IS_LOCKED(pDisk);1815 1816 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));1817 1818 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))1819 {1820 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */1821 vdIoCtxDefer(pDisk, pIoCtx);1822 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1823 }1824 1825 LogFlowFunc(("returns -> %Rrc\n", rc));1826 return rc;1827 }1828 1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)1830 {1831 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",1832 pDisk, pIoCtx, fProcessBlockedReqs));1833 1834 VD_IS_LOCKED(pDisk);1835 1836 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));1837 Assert(pDisk->pIoCtxLockOwner == pIoCtx);1838 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);1839 1840 if (fProcessBlockedReqs)1841 {1842 /* Process any blocked writes if the current request didn't caused another growing. */1843 vdDiskProcessBlockedIoCtx(pDisk);1844 }1845 1846 LogFlowFunc(("returns\n"));1847 }1848 1849 /**1850 * internal: read the specified amount of data in whatever blocks the backend1851 * will give us - async version.1852 */1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)1854 {1855 int rc;1856 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;1857 uint64_t uOffset = pIoCtx->Req.Io.uOffset;1858 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;1859 size_t cbThisRead;1860 1861 /* Loop until all reads started or we have a backend which needs to read metadata. */1862 do1863 {1864 /* Search for image with allocated block. Do not attempt to read more1865 * than the previous reads marked as valid. Otherwise this would return1866 * stale data when different block sizes are used for the images. */1867 cbThisRead = cbToRead;1868 1869 /*1870 * Try to read from the given image.1871 * If the block is not allocated read from override chain if present.1872 */1873 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1874 uOffset, cbThisRead,1875 pIoCtx, &cbThisRead);1876 1877 if (rc == VERR_VD_BLOCK_FREE)1878 {1879 while ( pCurrImage->pPrev != NULL1880 && rc == VERR_VD_BLOCK_FREE)1881 {1882 pCurrImage = pCurrImage->pPrev;1883 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1884 uOffset, cbThisRead,1885 pIoCtx, &cbThisRead);1886 }1887 }1888 1889 /* The task state will be updated on success already, don't do it here!. */1890 if (rc == VERR_VD_BLOCK_FREE)1891 {1892 /* No image in the chain contains the data for the block. */1893 vdIoCtxSet(pIoCtx, '\0', cbThisRead);1894 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);1895 rc = VINF_SUCCESS;1896 }1897 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1898 rc = VINF_SUCCESS;1899 else if (rc == VERR_VD_IOCTX_HALT)1900 {1901 uOffset += cbThisRead;1902 cbToRead -= cbThisRead;1903 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1904 }1905 1906 if (RT_FAILURE(rc))1907 break;1908 1909 cbToRead -= cbThisRead;1910 uOffset += cbThisRead;1911 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */1912 } while (cbToRead != 0 && RT_SUCCESS(rc));1913 1914 if ( rc == VERR_VD_NOT_ENOUGH_METADATA1915 || rc == VERR_VD_IOCTX_HALT)1916 {1917 /* Save the current state. */1918 pIoCtx->Req.Io.uOffset = uOffset;1919 pIoCtx->Req.Io.cbTransfer = cbToRead;1920 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;1921 }1922 1923 return rc;1924 }1925 1926 /**1927 * internal: parent image read wrapper for compacting.1928 */1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,1930 size_t cbRead)1931 {1932 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;1933 return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,1934 pvBuf, cbRead, false /* fUpdateCache */);1935 1967 } 1936 1968 … … 1984 2016 1985 2017 /** 1986 * internal: write a complete block (only used for diff images), taking the1987 * remaining data from parent images. This implementation does not optimize1988 * anything (except that it tries to read only that portions from parent1989 * images that are really needed).1990 */1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,1992 PVDIMAGE pImageParentOverride,1993 uint64_t uOffset, size_t cbWrite,1994 size_t cbThisWrite, size_t cbPreRead,1995 size_t cbPostRead, const void *pvBuf,1996 void *pvTmp)1997 {1998 int rc = VINF_SUCCESS;1999 2000 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2001 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2002 2003 /* Read the data that goes before the write to fill the block. */2004 if (cbPreRead)2005 {2006 /*2007 * Updating the cache doesn't make sense here because2008 * this will be done after the complete block was written.2009 */2010 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2011 uOffset - cbPreRead, pvTmp, cbPreRead,2012 true /* fZeroFreeBlocks*/,2013 false /* fUpdateCache */, 0);2014 if (RT_FAILURE(rc))2015 return rc;2016 }2017 2018 /* Copy the data to the right place in the buffer. */2019 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2020 2021 /* Read the data that goes after the write to fill the block. */2022 if (cbPostRead)2023 {2024 /* If we have data to be written, use that instead of reading2025 * data from the image. */2026 size_t cbWriteCopy;2027 if (cbWrite > cbThisWrite)2028 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2029 else2030 cbWriteCopy = 0;2031 /* Figure out how much we cannot read from the image, because2032 * the last block to write might exceed the nominal size of the2033 * image for technical reasons. */2034 size_t cbFill;2035 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2036 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2037 else2038 cbFill = 0;2039 /* The rest must be read from the image. */2040 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;2041 2042 /* Now assemble the remaining data. */2043 if (cbWriteCopy)2044 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2045 (char *)pvBuf + cbThisWrite, cbWriteCopy);2046 if (cbReadImage)2047 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2048 uOffset + cbThisWrite + cbWriteCopy,2049 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,2050 cbReadImage, true /* fZeroFreeBlocks */,2051 false /* fUpdateCache */, 0);2052 if (RT_FAILURE(rc))2053 return rc;2054 /* Zero out the remainder of this block. Will never be visible, as this2055 * is beyond the limit of the image. */2056 if (cbFill)2057 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2058 '\0', cbFill);2059 }2060 2061 /* Write the full block to the virtual disk. */2062 RTSGSEG SegmentBuf;2063 RTSGBUF SgBuf;2064 VDIOCTX IoCtx;2065 2066 SegmentBuf.pvSeg = pvTmp;2067 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2068 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2069 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2070 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2071 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2072 cbPreRead + cbThisWrite + cbPostRead,2073 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2074 Assert(rc != VERR_VD_BLOCK_FREE);2075 Assert(cbPreRead == 0);2076 Assert(cbPostRead == 0);2077 2078 return rc;2079 }2080 2081 /**2082 * internal: write a complete block (only used for diff images), taking the2083 * remaining data from parent images. This implementation optimizes out writes2084 * that do not change the data relative to the state as of the parent images.2085 * All backends which support differential/growing images support this.2086 */2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,2088 PVDIMAGE pImageParentOverride,2089 uint64_t uOffset, size_t cbWrite,2090 size_t cbThisWrite, size_t cbPreRead,2091 size_t cbPostRead, const void *pvBuf,2092 void *pvTmp, unsigned cImagesRead)2093 {2094 size_t cbFill = 0;2095 size_t cbWriteCopy = 0;2096 size_t cbReadImage = 0;2097 int rc;2098 2099 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2100 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2101 2102 if (cbPostRead)2103 {2104 /* Figure out how much we cannot read from the image, because2105 * the last block to write might exceed the nominal size of the2106 * image for technical reasons. */2107 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2108 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2109 2110 /* If we have data to be written, use that instead of reading2111 * data from the image. */2112 if (cbWrite > cbThisWrite)2113 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2114 2115 /* The rest must be read from the image. */2116 cbReadImage = cbPostRead - cbWriteCopy - cbFill;2117 }2118 2119 /* Read the entire data of the block so that we can compare whether it will2120 * be modified by the write or not. */2121 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,2122 cbPreRead + cbThisWrite + cbPostRead - cbFill,2123 true /* fZeroFreeBlocks */, false /* fUpdateCache */,2124 cImagesRead);2125 if (RT_FAILURE(rc))2126 return rc;2127 2128 /* Check if the write would modify anything in this block. */2129 if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)2130 && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,2131 (char *)pvBuf + cbThisWrite, cbWriteCopy)))2132 {2133 /* Block is completely unchanged, so no need to write anything. */2134 return VINF_SUCCESS;2135 }2136 2137 /* Copy the data to the right place in the buffer. */2138 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2139 2140 /* Handle the data that goes after the write to fill the block. */2141 if (cbPostRead)2142 {2143 /* Now assemble the remaining data. */2144 if (cbWriteCopy)2145 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2146 (char *)pvBuf + cbThisWrite, cbWriteCopy);2147 /* Zero out the remainder of this block. Will never be visible, as this2148 * is beyond the limit of the image. */2149 if (cbFill)2150 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2151 '\0', cbFill);2152 }2153 2154 /* Write the full block to the virtual disk. */2155 RTSGSEG SegmentBuf;2156 RTSGBUF SgBuf;2157 VDIOCTX IoCtx;2158 2159 SegmentBuf.pvSeg = pvTmp;2160 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2161 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2162 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2163 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2164 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2165 cbPreRead + cbThisWrite + cbPostRead,2166 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2167 Assert(rc != VERR_VD_BLOCK_FREE);2168 Assert(cbPreRead == 0);2169 Assert(cbPostRead == 0);2170 2171 return rc;2172 }2173 2174 /**2175 2018 * internal: write buffer to the image, taking care of block boundaries and 2176 2019 * write optimizations. … … 2181 2024 bool fUpdateCache, unsigned cImagesRead) 2182 2025 { 2183 int rc; 2184 unsigned fWrite; 2185 size_t cbThisWrite; 2186 size_t cbPreRead, cbPostRead; 2187 uint64_t uOffsetCur = uOffset; 2188 size_t cbWriteCur = cbWrite; 2189 const void *pcvBufCur = pvBuf; 2190 RTSGSEG SegmentBuf; 2026 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 2027 RTSGSEG Segment; 2191 2028 RTSGBUF SgBuf; 2192 2029 VDIOCTX IoCtx; 2193 2030 2194 /* Loop until all written. */ 2195 do 2196 { 2197 /* Try to write the possibly partial block to the last opened image. 2198 * This works when the block is already allocated in this image or 2199 * if it is a full-block write (and allocation isn't suppressed below). 2200 * For image formats which don't support zero blocks, it's beneficial 2201 * to avoid unnecessarily allocating unchanged blocks. This prevents 2202 * unwanted expanding of images. VMDK is an example. */ 2203 cbThisWrite = cbWriteCur; 2204 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) 2205 ? 0 : VD_WRITE_NO_ALLOC; 2206 2207 SegmentBuf.pvSeg = (void *)pcvBufCur; 2208 SegmentBuf.cbSeg = cbWrite; 2209 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2210 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2211 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2212 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite, 2213 &IoCtx, &cbThisWrite, &cbPreRead, 2214 &cbPostRead, fWrite); 2215 if (rc == VERR_VD_BLOCK_FREE) 2216 { 2217 void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); 2218 AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); 2219 2220 if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) 2221 { 2222 /* Optimized write, suppress writing to a so far unallocated 2223 * block if the data is in fact not changed. */ 2224 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, 2225 uOffsetCur, cbWriteCur, 2226 cbThisWrite, cbPreRead, cbPostRead, 2227 pcvBufCur, pvTmp, cImagesRead); 2228 } 2229 else 2230 { 2231 /* Normal write, not optimized in any way. The block will 2232 * be written no matter what. This will usually (unless the 2233 * backend has some further optimization enabled) cause the 2234 * block to be allocated. */ 2235 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, 2236 uOffsetCur, cbWriteCur, 2237 cbThisWrite, cbPreRead, cbPostRead, 2238 pcvBufCur, pvTmp); 2239 } 2240 RTMemTmpFree(pvTmp); 2241 if (RT_FAILURE(rc)) 2242 break; 2243 } 2244 2245 cbWriteCur -= cbThisWrite; 2246 uOffsetCur += cbThisWrite; 2247 pcvBufCur = (char *)pcvBufCur + cbThisWrite; 2248 } while (cbWriteCur != 0 && RT_SUCCESS(rc)); 2249 2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */ 2251 /* Update the cache on success */ 2252 if ( RT_SUCCESS(rc) 2253 && pDisk->pCache 2254 && fUpdateCache) 2255 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); 2256 2257 if (RT_SUCCESS(rc)) 2258 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); 2259 #endif 2260 2261 return rc; 2031 if (fUpdateCache) 2032 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 2033 2034 Segment.pvSeg = (void *)pvBuf; 2035 Segment.cbSeg = cbWrite; 2036 RTSgBufInit(&SgBuf, &Segment, 1); 2037 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, 2038 NULL, vdWriteHelperAsync, fFlags); 2039 2040 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 2041 IoCtx.Req.Io.cImagesRead = cImagesRead; 2042 IoCtx.pIoCtxParent = NULL; 2043 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 2044 IoCtx.Type.Root.pvUser1 = pDisk; 2045 IoCtx.Type.Root.pvUser2 = NULL; 2046 return vdIoCtxProcessSync(&IoCtx); 2262 2047 } 2263 2048 … … 2491 2276 } 2492 2277 2493 /** 2494 * internal: write a complete block (only used for diff images), taking the 2495 * remaining data from parent images. This implementation does not optimize 2496 * anything (except that it tries to read only that portions from parent 2497 * images that are really needed) - async version. 2498 */ 2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2500 { 2501 int rc = VINF_SUCCESS; 2502 2503 #if 0 2504 2505 /* Read the data that goes before the write to fill the block. */ 2506 if (cbPreRead) 2507 { 2508 rc = vdReadHelperAsync(pIoCtxDst); 2509 if (RT_FAILURE(rc)) 2510 return rc; 2511 } 2512 2513 /* Copy the data to the right place in the buffer. */ 2514 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); 2515 2516 /* Read the data that goes after the write to fill the block. */ 2517 if (cbPostRead) 2518 { 2519 /* If we have data to be written, use that instead of reading 2520 * data from the image. */ 2521 size_t cbWriteCopy; 2522 if (cbWrite > cbThisWrite) 2523 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2524 else 2525 cbWriteCopy = 0; 2526 /* Figure out how much we cannot read from the image, because 2527 * the last block to write might exceed the nominal size of the 2528 * image for technical reasons. */ 2529 size_t cbFill; 2530 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2531 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2532 else 2533 cbFill = 0; 2534 /* The rest must be read from the image. */ 2535 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2536 2537 /* Now assemble the remaining data. */ 2538 if (cbWriteCopy) 2539 { 2540 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); 2541 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); 2542 } 2543 2544 if (cbReadImage) 2545 rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, 2546 uOffset + cbThisWrite + cbWriteCopy, 2547 cbReadImage); 2548 if (RT_FAILURE(rc)) 2549 return rc; 2550 /* Zero out the remainder of this block. Will never be visible, as this 2551 * is beyond the limit of the image. */ 2552 if (cbFill) 2553 { 2554 vdIoCtxSet(pIoCtxDst, '\0', cbFill); 2555 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); 2556 } 2557 } 2558 2559 if ( !pIoCtxDst->cbTransferLeft 2560 && !pIoCtxDst->cMetaTransfersPending 2561 && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) 2562 { 2563 /* Write the full block to the virtual disk. */ 2564 vdIoCtxChildReset(pIoCtxDst); 2565 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2566 uOffset - cbPreRead, 2567 cbPreRead + cbThisWrite + cbPostRead, 2568 pIoCtxDst, 2569 NULL, &cbPreRead, &cbPostRead, 0); 2570 Assert(rc != VERR_VD_BLOCK_FREE); 2571 Assert(cbPreRead == 0); 2572 Assert(cbPostRead == 0); 2573 } 2574 else 2575 { 2576 LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 2577 pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, 2578 pIoCtxDst->fComplete)); 2579 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2580 } 2581 2582 return rc; 2583 #endif 2584 return VERR_NOT_IMPLEMENTED; 2585 } 2586 2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) 2278 static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) 2588 2279 { 2589 2280 int rc = VINF_SUCCESS; … … 2595 2286 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2596 2287 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2597 2598 2599 2288 pIoCtx->Req.Io.uOffset - cbPreRead, 2289 cbPreRead + cbThisWrite + cbPostRead, 2290 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2600 2291 Assert(rc != VERR_VD_BLOCK_FREE); 2601 2292 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); … … 2686 2377 /* Write the full block to the virtual disk. */ 2687 2378 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2688 pIoCtx->pfnIoCtxTransferNext = vdWriteHelper OptimizedCommitAsync;2379 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2689 2380 2690 2381 return rc; … … 2696 2387 2697 2388 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2389 2390 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2698 2391 2699 2392 if (pIoCtx->Req.Io.cbTransferLeft) … … 2762 2455 /* Next step */ 2763 2456 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync; 2457 return VINF_SUCCESS; 2458 } 2459 2460 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx) 2461 { 2462 int rc = VINF_SUCCESS; 2463 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2464 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2465 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent; 2466 2467 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2468 2469 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite); 2470 if (cbPostRead) 2471 { 2472 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill; 2473 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy; 2474 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage; 2475 2476 /* Now assemble the remaining data. */ 2477 if (cbWriteCopy) 2478 { 2479 /* 2480 * The S/G buffer of the parent needs to be cloned because 2481 * it is not allowed to modify the state. 2482 */ 2483 RTSGBUF SgBufParentTmp; 2484 2485 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf); 2486 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy); 2487 } 2488 2489 /* Zero out the remainder of this block. Will never be visible, as this 2490 * is beyond the limit of the image. */ 2491 if (cbFill) 2492 { 2493 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage); 2494 vdIoCtxSet(pIoCtx, '\0', cbFill); 2495 } 2496 2497 if (cbReadImage) 2498 { 2499 /* Read remaining data. */ 2500 } 2501 else 2502 { 2503 /* Write the full block to the virtual disk. */ 2504 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2505 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2506 } 2507 } 2508 else 2509 { 2510 /* Write the full block to the virtual disk. */ 2511 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2512 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2513 } 2514 2515 return rc; 2516 } 2517 2518 static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx) 2519 { 2520 int rc = VINF_SUCCESS; 2521 2522 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2523 2524 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2525 2526 if (pIoCtx->Req.Io.cbTransferLeft) 2527 rc = vdReadHelperAsync(pIoCtx); 2528 2529 if ( RT_SUCCESS(rc) 2530 && ( pIoCtx->Req.Io.cbTransferLeft 2531 || pIoCtx->cMetaTransfersPending)) 2532 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2533 else 2534 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2535 2536 return rc; 2537 } 2538 2539 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2540 { 2541 PVBOXHDD pDisk = pIoCtx->pDisk; 2542 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved; 2543 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2544 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead; 2545 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2546 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent; 2547 size_t cbFill = 0; 2548 size_t cbWriteCopy = 0; 2549 size_t cbReadImage = 0; 2550 2551 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2552 2553 AssertPtr(pIoCtx->pIoCtxParent); 2554 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent); 2555 2556 /* Calculate the amount of data to read that goes after the write to fill the block. */ 2557 if (cbPostRead) 2558 { 2559 /* If we have data to be written, use that instead of reading 2560 * data from the image. */ 2561 cbWriteCopy; 2562 if (cbWrite > cbThisWrite) 2563 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2564 2565 /* Figure out how much we cannot read from the image, because 2566 * the last block to write might exceed the nominal size of the 2567 * image for technical reasons. */ 2568 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2569 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2570 2571 /* The rest must be read from the image. */ 2572 cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2573 } 2574 2575 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill; 2576 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy; 2577 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage; 2578 2579 /* Next step */ 2580 if (cbPreRead) 2581 { 2582 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync; 2583 2584 /* Read the data that goes before the write to fill the block. */ 2585 pIoCtx->Req.Io.cbTransferLeft = cbPreRead; 2586 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft; 2587 pIoCtx->Req.Io.uOffset -= cbPreRead; 2588 } 2589 else 2590 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2591 2764 2592 return VINF_SUCCESS; 2765 2593 } … … 9637 9465 pfnComplete, pvUser1, pvUser2, 9638 9466 NULL, vdReadHelperAsync, 9639 VDIOCTX_FLAGS_ DEFAULT);9467 VDIOCTX_FLAGS_ZERO_FREE_BLOCKS); 9640 9468 if (!pIoCtx) 9641 9469 {
Note:
See TracChangeset
for help on using the changeset viewer.