Changeset 44794 in vbox for trunk/src/VBox/Storage
- Timestamp:
- Feb 21, 2013 7:42:59 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Storage/VD.cpp
r44628 r44794 355 355 /** S/G buffer */ 356 356 RTSGBUF SgBuf; 357 /** Number of bytes to clear in the buffer before the current read. */ 358 size_t cbBufClear; 359 /** Number of images to read. */ 360 unsigned cImagesRead; 361 /** Override for the parent image to start reading from. */ 362 PVDIMAGE pImageParentOverride; 357 363 } Io; 358 364 /** Discard requests. */ … … 424 430 425 431 /** Default flags for an I/O context, i.e. unblocked and async. */ 426 #define VDIOCTX_FLAGS_DEFAULT (0)432 #define VDIOCTX_FLAGS_DEFAULT (0) 427 433 /** Flag whether the context is blocked. */ 428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)434 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) 429 435 /** Flag whether the I/O context is using synchronous I/O. */ 430 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 436 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 437 /** Flag whether the read should update the cache. */ 438 #define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2) 439 /** Flag whether free blocks should be zeroed. 440 * If false and no image has data for sepcified 441 * range VERR_VD_BLOCK_FREE is returned for the I/O context. 442 * Note that unallocated blocks are still zeroed 443 * if at least one image has valid data for a part 444 * of the range. 445 */ 446 #define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) 447 /** Don't free the I/O context when complete because 448 * it was alloacted elsewhere (stack, ...). */ 449 #define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) 431 450 432 451 /** NIL I/O context pointer value. */ … … 577 596 /** Forward declaration of the async discard helper. */ 578 597 static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); 598 static int vdWriteHelperAsync(PVDIOCTX pIoCtx); 579 599 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); 580 600 static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); 601 static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq); 581 602 582 603 /** … … 809 830 pIoCtx->Req.Io.pImageStart = pImageStart; 810 831 pIoCtx->Req.Io.pImageCur = pImageStart; 832 pIoCtx->Req.Io.cbBufClear = 0; 833 pIoCtx->Req.Io.pImageParentOverride = NULL; 811 834 pIoCtx->cDataTransfersPending = 0; 812 835 pIoCtx->cMetaTransfersPending = 0; … … 817 840 pIoCtx->pfnIoCtxTransferNext = NULL; 818 841 pIoCtx->rcReq = VINF_SUCCESS; 842 pIoCtx->pIoCtxParent = NULL; 819 843 820 844 /* There is no S/G list for a flush request. */ … … 845 869 */ 846 870 static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, 847 PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)871 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead) 848 872 { 849 873 int rc = VINF_SUCCESS; … … 908 932 909 933 /** 934 * Creates a new empty discard state. 935 * 936 * @returns Pointer to the new discard state or NULL if out of memory. 937 */ 938 static PVDDISCARDSTATE vdDiscardStateCreate(void) 939 { 940 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE)); 941 942 if (pDiscard) 943 { 944 RTListInit(&pDiscard->ListLru); 945 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE)); 946 if (!pDiscard->pTreeBlocks) 947 { 948 RTMemFree(pDiscard); 949 pDiscard = NULL; 950 } 951 } 952 953 return pDiscard; 954 } 955 956 /** 957 * Removes the least recently used blocks from the waiting list until 958 * the new value is reached. 959 * 960 * @returns VBox status code. 961 * @param pDisk VD disk container. 962 * @param pDiscard The discard state. 963 * @param cbDiscardingNew How many bytes should be waiting on success. 964 * The number of bytes waiting can be less. 965 */ 966 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew) 967 { 968 int rc = VINF_SUCCESS; 969 970 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n", 971 pDisk, pDiscard, cbDiscardingNew)); 972 973 while (pDiscard->cbDiscarding > cbDiscardingNew) 974 { 975 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru); 976 977 Assert(!RTListIsEmpty(&pDiscard->ListLru)); 978 979 /* Go over the allocation bitmap and mark all discarded sectors as unused. */ 980 uint64_t offStart = pBlock->Core.Key; 981 uint32_t idxStart = 0; 982 size_t cbLeft = pBlock->cbDiscard; 983 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); 984 uint32_t cSectors = pBlock->cbDiscard / 512; 985 986 while (cbLeft > 0) 987 { 988 int32_t idxEnd; 989 size_t cbThis = cbLeft; 990 991 if (fAllocated) 992 { 993 /* Check for the first unallocated bit. */ 994 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart); 995 if (idxEnd != -1) 996 { 997 cbThis = (idxEnd - idxStart) * 512; 998 fAllocated = false; 999 } 1000 } 1001 else 1002 { 1003 /* Mark as unused and check for the first set bit. */ 1004 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart); 1005 if (idxEnd != -1) 1006 cbThis = (idxEnd - idxStart) * 512; 1007 1008 1009 VDIOCTX IoCtx; 1010 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, 1011 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1012 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, 1013 &IoCtx, offStart, cbThis, NULL, 1014 NULL, &cbThis, NULL, 1015 VD_DISCARD_MARK_UNUSED); 1016 if (RT_FAILURE(rc)) 1017 break; 1018 1019 fAllocated = true; 1020 } 1021 1022 idxStart = idxEnd; 1023 offStart += cbThis; 1024 cbLeft -= cbThis; 1025 } 1026 1027 if (RT_FAILURE(rc)) 1028 break; 1029 1030 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); 1031 Assert(pBlockRemove == pBlock); 1032 RTListNodeRemove(&pBlock->NodeLru); 1033 1034 pDiscard->cbDiscarding -= pBlock->cbDiscard; 1035 RTMemFree(pBlock->pbmAllocated); 1036 RTMemFree(pBlock); 1037 } 1038 1039 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew); 1040 1041 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1042 return rc; 1043 } 1044 1045 /** 1046 * Destroys the current discard state, writing any waiting blocks to the image. 1047 * 1048 * @returns VBox status code. 1049 * @param pDisk VD disk container. 1050 */ 1051 static int vdDiscardStateDestroy(PVBOXHDD pDisk) 1052 { 1053 int rc = VINF_SUCCESS; 1054 1055 if (pDisk->pDiscard) 1056 { 1057 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */); 1058 AssertRC(rc); 1059 RTMemFree(pDisk->pDiscard->pTreeBlocks); 1060 RTMemFree(pDisk->pDiscard); 1061 pDisk->pDiscard = NULL; 1062 } 1063 1064 return rc; 1065 } 1066 1067 /** 1068 * Marks the given range as allocated in the image. 1069 * Required if there are discards in progress and a write to a block which can get discarded 1070 * is written to. 1071 * 1072 * @returns VBox status code. 1073 * @param pDisk VD container data. 1074 * @param uOffset First byte to mark as allocated. 1075 * @param cbRange Number of bytes to mark as allocated. 1076 */ 1077 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange) 1078 { 1079 PVDDISCARDSTATE pDiscard = pDisk->pDiscard; 1080 int rc = VINF_SUCCESS; 1081 1082 if (pDiscard) 1083 { 1084 do 1085 { 1086 size_t cbThisRange = cbRange; 1087 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset); 1088 1089 if (pBlock) 1090 { 1091 int32_t idxStart, idxEnd; 1092 1093 Assert(!(cbThisRange % 512)); 1094 Assert(!((uOffset - pBlock->Core.Key) % 512)); 1095 1096 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); 1097 1098 idxStart = (uOffset - pBlock->Core.Key) / 512; 1099 idxEnd = idxStart + (cbThisRange / 512); 1100 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); 1101 } 1102 else 1103 { 1104 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true); 1105 if (pBlock) 1106 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset); 1107 } 1108 1109 Assert(cbRange >= cbThisRange); 1110 1111 uOffset += cbThisRange; 1112 cbRange -= cbThisRange; 1113 } while (cbRange != 0); 1114 } 1115 1116 return rc; 1117 } 1118 1119 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1120 uint64_t uOffset, size_t cbTransfer, 1121 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, 1122 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1123 uint32_t fFlags) 1124 { 1125 PVDIOCTX pIoCtx = NULL; 1126 1127 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1128 if (RT_LIKELY(pIoCtx)) 1129 { 1130 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1131 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1132 } 1133 1134 return pIoCtx; 1135 } 1136 1137 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1138 uint64_t uOffset, size_t cbTransfer, 1139 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1140 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1141 void *pvUser1, void *pvUser2, 1142 void *pvAllocation, 1143 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1144 uint32_t fFlags) 1145 { 1146 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1147 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1148 1149 if (RT_LIKELY(pIoCtx)) 1150 { 1151 pIoCtx->pIoCtxParent = NULL; 1152 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1153 pIoCtx->Type.Root.pvUser1 = pvUser1; 1154 pIoCtx->Type.Root.pvUser2 = pvUser2; 1155 } 1156 1157 LogFlow(("Allocated root I/O context %#p\n", pIoCtx)); 1158 return pIoCtx; 1159 } 1160 1161 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, 1162 unsigned cRanges, 1163 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1164 void *pvUser1, void *pvUser2, 1165 void *pvAllocation, 1166 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1167 uint32_t fFlags) 1168 { 1169 PVDIOCTX pIoCtx = NULL; 1170 1171 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1172 if (RT_LIKELY(pIoCtx)) 1173 { 1174 pIoCtx->pIoCtxNext = NULL; 1175 pIoCtx->pDisk = pDisk; 1176 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD; 1177 pIoCtx->cDataTransfersPending = 0; 1178 pIoCtx->cMetaTransfersPending = 0; 1179 pIoCtx->fComplete = false; 1180 pIoCtx->fFlags = fFlags; 1181 pIoCtx->pvAllocation = pvAllocation; 1182 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; 1183 pIoCtx->pfnIoCtxTransferNext = NULL; 1184 pIoCtx->rcReq = VINF_SUCCESS; 1185 pIoCtx->Req.Discard.paRanges = paRanges; 1186 pIoCtx->Req.Discard.cRanges = cRanges; 1187 pIoCtx->Req.Discard.idxRange = 0; 1188 pIoCtx->Req.Discard.cbDiscardLeft = 0; 1189 pIoCtx->Req.Discard.offCur = 0; 1190 pIoCtx->Req.Discard.cbThisDiscard = 0; 1191 1192 pIoCtx->pIoCtxParent = NULL; 1193 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1194 pIoCtx->Type.Root.pvUser1 = pvUser1; 1195 pIoCtx->Type.Root.pvUser2 = pvUser2; 1196 } 1197 1198 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx)); 1199 return pIoCtx; 1200 } 1201 1202 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1203 uint64_t uOffset, size_t cbTransfer, 1204 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1205 PVDIOCTX pIoCtxParent, size_t cbTransferParent, 1206 size_t cbWriteParent, void *pvAllocation, 1207 PFNVDIOCTXTRANSFER pfnIoCtxTransfer) 1208 { 1209 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1210 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0); 1211 1212 AssertPtr(pIoCtxParent); 1213 Assert(!pIoCtxParent->pIoCtxParent); 1214 1215 if (RT_LIKELY(pIoCtx)) 1216 { 1217 pIoCtx->pIoCtxParent = pIoCtxParent; 1218 pIoCtx->Type.Child.uOffsetSaved = uOffset; 1219 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer; 1220 pIoCtx->Type.Child.cbTransferParent = cbTransferParent; 1221 pIoCtx->Type.Child.cbWriteParent = cbWriteParent; 1222 } 1223 1224 LogFlow(("Allocated child I/O context %#p\n", pIoCtx)); 1225 return pIoCtx; 1226 } 1227 1228 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer) 1229 { 1230 PVDIOTASK pIoTask = NULL; 1231 1232 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1233 if (pIoTask) 1234 { 1235 pIoTask->pIoStorage = pIoStorage; 1236 pIoTask->pfnComplete = pfnComplete; 1237 pIoTask->pvUser = pvUser; 1238 pIoTask->fMeta = false; 1239 pIoTask->Type.User.cbTransfer = cbTransfer; 1240 pIoTask->Type.User.pIoCtx = pIoCtx; 1241 } 1242 1243 return pIoTask; 1244 } 1245 1246 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer) 1247 { 1248 PVDIOTASK pIoTask = NULL; 1249 1250 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1251 if (pIoTask) 1252 { 1253 pIoTask->pIoStorage = pIoStorage; 1254 pIoTask->pfnComplete = pfnComplete; 1255 pIoTask->pvUser = pvUser; 1256 pIoTask->fMeta = true; 1257 pIoTask->Type.Meta.pMetaXfer = pMetaXfer; 1258 } 1259 1260 return pIoTask; 1261 } 1262 1263 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1264 { 1265 LogFlow(("Freeing I/O context %#p\n", pIoCtx)); 1266 1267 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE)) 1268 { 1269 if (pIoCtx->pvAllocation) 1270 RTMemFree(pIoCtx->pvAllocation); 1271 #ifdef DEBUG 1272 memset(pIoCtx, 0xff, sizeof(VDIOCTX)); 1273 #endif 1274 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); 1275 } 1276 } 1277 1278 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) 1279 { 1280 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); 1281 } 1282 1283 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) 1284 { 1285 AssertPtr(pIoCtx->pIoCtxParent); 1286 1287 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 1288 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; 1289 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; 1290 } 1291 1292 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) 1293 { 1294 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb])); 1295 1296 if (RT_LIKELY(pMetaXfer)) 1297 { 1298 pMetaXfer->Core.Key = uOffset; 1299 pMetaXfer->Core.KeyLast = uOffset + cb - 1; 1300 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE; 1301 pMetaXfer->cbMeta = cb; 1302 pMetaXfer->pIoStorage = pIoStorage; 1303 pMetaXfer->cRefs = 0; 1304 RTListInit(&pMetaXfer->ListIoCtxWaiting); 1305 } 1306 return pMetaXfer; 1307 } 1308 1309 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) 1310 { 1311 /* Put it on the waiting list. */ 1312 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); 1313 PVDIOCTX pHeadOld; 1314 pIoCtx->pIoCtxNext = pNext; 1315 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) 1316 { 1317 pNext = pHeadOld; 1318 Assert(pNext != pIoCtx); 1319 pIoCtx->pIoCtxNext = pNext; 1320 ASMNopPause(); 1321 } 1322 } 1323 1324 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1325 { 1326 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); 1327 1328 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); 1329 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1330 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); 1331 } 1332 1333 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) 1334 { 1335 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData); 1336 } 1337 1338 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) 1339 { 1340 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); 1341 } 1342 1343 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) 1344 { 1345 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1346 } 1347 1348 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) 1349 { 1350 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1351 } 1352 1353 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) 1354 { 1355 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData); 1356 } 1357 1358 /** 1359 * Process the I/O context, core method which assumes that the I/O context 1360 * acquired the lock. 1361 * 1362 * @returns VBox status code. 1363 * @param pIoCtx I/O context to process. 1364 */ 1365 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) 1366 { 1367 int rc = VINF_SUCCESS; 1368 1369 VD_IS_LOCKED(pIoCtx->pDisk); 1370 1371 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1372 1373 if ( !pIoCtx->cMetaTransfersPending 1374 && !pIoCtx->cDataTransfersPending 1375 && !pIoCtx->pfnIoCtxTransfer) 1376 { 1377 rc = VINF_VD_ASYNC_IO_FINISHED; 1378 goto out; 1379 } 1380 1381 /* 1382 * We complete the I/O context in case of an error 1383 * if there is no I/O task pending. 1384 */ 1385 if ( RT_FAILURE(pIoCtx->rcReq) 1386 && !pIoCtx->cMetaTransfersPending 1387 && !pIoCtx->cDataTransfersPending) 1388 { 1389 rc = VINF_VD_ASYNC_IO_FINISHED; 1390 goto out; 1391 } 1392 1393 /* Don't change anything if there is a metadata transfer pending or we are blocked. */ 1394 if ( pIoCtx->cMetaTransfersPending 1395 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) 1396 { 1397 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1398 goto out; 1399 } 1400 1401 if (pIoCtx->pfnIoCtxTransfer) 1402 { 1403 /* Call the transfer function advancing to the next while there is no error. */ 1404 while ( pIoCtx->pfnIoCtxTransfer 1405 && !pIoCtx->cMetaTransfersPending 1406 && RT_SUCCESS(rc)) 1407 { 1408 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer)); 1409 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx); 1410 1411 /* Advance to the next part of the transfer if the current one succeeded. */ 1412 if (RT_SUCCESS(rc)) 1413 { 1414 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; 1415 pIoCtx->pfnIoCtxTransferNext = NULL; 1416 } 1417 } 1418 } 1419 1420 if ( RT_SUCCESS(rc) 1421 && !pIoCtx->cMetaTransfersPending 1422 && !pIoCtx->cDataTransfersPending) 1423 rc = VINF_VD_ASYNC_IO_FINISHED; 1424 else if ( RT_SUCCESS(rc) 1425 || rc == VERR_VD_NOT_ENOUGH_METADATA 1426 || rc == VERR_VD_IOCTX_HALT) 1427 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1428 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) 1429 { 1430 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); 1431 /* 1432 * The I/O context completed if we have an error and there is no data 1433 * or meta data transfer pending. 1434 */ 1435 if ( !pIoCtx->cMetaTransfersPending 1436 && !pIoCtx->cDataTransfersPending) 1437 rc = VINF_VD_ASYNC_IO_FINISHED; 1438 else 1439 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1440 } 1441 1442 out: 1443 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 1444 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending, 1445 pIoCtx->fComplete)); 1446 1447 return rc; 1448 } 1449 1450 /** 1451 * Processes the list of waiting I/O contexts. 1452 * 1453 * @returns VBox status code. 1454 * @param pDisk The disk structure. 1455 * @param pIoCtxRc An I/O context handle which waits on the list. When processed 1456 * The status code is returned. NULL if there is no I/O context 1457 * to return the status code for. 1458 */ 1459 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) 1460 { 1461 int rc = VINF_SUCCESS; 1462 1463 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); 1464 1465 VD_IS_LOCKED(pDisk); 1466 1467 /* Get the waiting list and process it in FIFO order. */ 1468 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); 1469 1470 /* Reverse it. */ 1471 PVDIOCTX pCur = pIoCtxHead; 1472 pIoCtxHead = NULL; 1473 while (pCur) 1474 { 1475 PVDIOCTX pInsert = pCur; 1476 pCur = pCur->pIoCtxNext; 1477 pInsert->pIoCtxNext = pIoCtxHead; 1478 pIoCtxHead = pInsert; 1479 } 1480 1481 /* Process now. */ 1482 pCur = pIoCtxHead; 1483 while (pCur) 1484 { 1485 int rcTmp; 1486 PVDIOCTX pTmp = pCur; 1487 1488 pCur = pCur->pIoCtxNext; 1489 pTmp->pIoCtxNext = NULL; 1490 1491 /* 1492 * Need to clear the sync flag here if there is a new I/O context 1493 * with it set and the context is not given in pIoCtxRc. 1494 * This happens most likely on a different thread and that one shouldn't 1495 * process the context synchronously. 1496 * 1497 * The thread who issued the context will wait on the event semaphore 1498 * anyway which is signalled when the completion handler is called. 1499 */ 1500 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC 1501 && pTmp != pIoCtxRc) 1502 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC; 1503 1504 rcTmp = vdIoCtxProcessLocked(pTmp); 1505 if (pTmp == pIoCtxRc) 1506 { 1507 /* The given I/O context was processed, pass the return code to the caller. */ 1508 rc = rcTmp; 1509 } 1510 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED 1511 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1512 { 1513 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1514 vdThreadFinishWrite(pDisk); 1515 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1516 pTmp->Type.Root.pvUser2, 1517 pTmp->rcReq); 1518 vdIoCtxFree(pDisk, pTmp); 1519 } 1520 } 1521 1522 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1523 return rc; 1524 } 1525 1526 /** 1527 * Processes the list of blocked I/O contexts. 1528 * 1529 * @returns nothing. 1530 * @param pDisk The disk structure. 1531 */ 1532 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) 1533 { 1534 LogFlowFunc(("pDisk=%#p\n", pDisk)); 1535 1536 VD_IS_LOCKED(pDisk); 1537 1538 /* Get the waiting list and process it in FIFO order. */ 1539 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); 1540 1541 /* Reverse it. */ 1542 PVDIOCTX pCur = pIoCtxHead; 1543 pIoCtxHead = NULL; 1544 while (pCur) 1545 { 1546 PVDIOCTX pInsert = pCur; 1547 pCur = pCur->pIoCtxNext; 1548 pInsert->pIoCtxNext = pIoCtxHead; 1549 pIoCtxHead = pInsert; 1550 } 1551 1552 /* Process now. */ 1553 pCur = pIoCtxHead; 1554 while (pCur) 1555 { 1556 int rc; 1557 PVDIOCTX pTmp = pCur; 1558 1559 pCur = pCur->pIoCtxNext; 1560 pTmp->pIoCtxNext = NULL; 1561 1562 Assert(!pTmp->pIoCtxParent); 1563 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); 1564 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; 1565 1566 rc = vdIoCtxProcessLocked(pTmp); 1567 if ( rc == VINF_VD_ASYNC_IO_FINISHED 1568 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1569 { 1570 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1571 vdThreadFinishWrite(pDisk); 1572 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1573 pTmp->Type.Root.pvUser2, 1574 pTmp->rcReq); 1575 vdIoCtxFree(pDisk, pTmp); 1576 } 1577 } 1578 1579 LogFlowFunc(("returns\n")); 1580 } 1581 1582 /** 1583 * Processes the I/O context trying to lock the criticial section. 1584 * The context is deferred if the critical section is busy. 1585 * 1586 * @returns VBox status code. 1587 * @param pIoCtx The I/O context to process. 1588 */ 1589 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) 1590 { 1591 int rc = VINF_SUCCESS; 1592 PVBOXHDD pDisk = pIoCtx->pDisk; 1593 1594 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1595 1596 /* Put it on the waiting list first. */ 1597 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); 1598 1599 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) 1600 { 1601 /* Leave it again, the context will be processed just before leaving the lock. */ 1602 LogFlowFunc(("Successfully acquired the lock\n")); 1603 rc = vdDiskUnlock(pDisk, pIoCtx); 1604 } 1605 else 1606 { 1607 LogFlowFunc(("Lock is held\n")); 1608 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1609 } 1610 1611 return rc; 1612 } 1613 1614 /** 1615 * Process the I/O context in a synchronous manner, waiting 1616 * for it to complete. 1617 * 1618 * @returns VBox status code of the completed request. 1619 * @param pIoCtx The sync I/O context. 1620 */ 1621 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) 1622 { 1623 int rc = VINF_SUCCESS; 1624 PVBOXHDD pDisk = pIoCtx->pDisk; 1625 1626 LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); 1627 1628 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, 1629 ("I/O context is not marked as synchronous\n")); 1630 1631 rc = vdIoCtxProcessTryLockDefer(pIoCtx); 1632 if (rc == VINF_VD_ASYNC_IO_FINISHED) 1633 rc = VINF_SUCCESS; 1634 1635 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1636 { 1637 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); 1638 AssertRC(rc); 1639 1640 rc = pDisk->rcSync; 1641 } 1642 else /* Success or error. */ 1643 vdIoCtxFree(pDisk, pIoCtx); 1644 1645 return rc; 1646 } 1647 1648 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1649 { 1650 return pDisk->pIoCtxLockOwner == pIoCtx; 1651 } 1652 1653 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1654 { 1655 int rc = VINF_SUCCESS; 1656 1657 VD_IS_LOCKED(pDisk); 1658 1659 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); 1660 1661 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) 1662 { 1663 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ 1664 vdIoCtxDefer(pDisk, pIoCtx); 1665 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1666 } 1667 1668 LogFlowFunc(("returns -> %Rrc\n", rc)); 1669 return rc; 1670 } 1671 1672 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) 1673 { 1674 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", 1675 pDisk, pIoCtx, fProcessBlockedReqs)); 1676 1677 VD_IS_LOCKED(pDisk); 1678 1679 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); 1680 Assert(pDisk->pIoCtxLockOwner == pIoCtx); 1681 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); 1682 1683 if (fProcessBlockedReqs) 1684 { 1685 /* Process any blocked writes if the current request didn't caused another growing. */ 1686 vdDiskProcessBlockedIoCtx(pDisk); 1687 } 1688 1689 LogFlowFunc(("returns\n")); 1690 } 1691 1692 /** 910 1693 * Internal: Reads a given amount of data from the image chain of the disk. 911 1694 **/ 912 1695 static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, 913 uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)1696 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead) 914 1697 { 915 1698 int rc = VINF_SUCCESS; 916 1699 size_t cbThisRead = cbRead; 917 RTSGSEG SegmentBuf;918 RTSGBUF SgBuf;919 VDIOCTX IoCtx;920 1700 921 1701 AssertPtr(pcbThisRead); 922 1702 923 1703 *pcbThisRead = 0; 924 925 SegmentBuf.pvSeg = pvBuf;926 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;927 RTSgBufInit(&SgBuf, &SegmentBuf, 1);928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,929 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);930 1704 931 1705 /* … … 934 1708 */ 935 1709 rc = pImage->Backend->pfnRead(pImage->pBackendData, 936 uOffset, cbThisRead, &IoCtx,937 1710 uOffset, cbThisRead, pIoCtx, 1711 &cbThisRead); 938 1712 939 1713 if (rc == VERR_VD_BLOCK_FREE) … … 944 1718 { 945 1719 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 946 uOffset, cbThisRead, &IoCtx,947 1720 uOffset, cbThisRead, pIoCtx, 1721 &cbThisRead); 948 1722 } 949 1723 } … … 952 1726 *pcbThisRead = cbThisRead; 953 1727 1728 return rc; 1729 } 1730 1731 /** 1732 * internal: read the specified amount of data in whatever blocks the backend 1733 * will give us - async version. 1734 */ 1735 static int vdReadHelperAsync(PVDIOCTX pIoCtx) 1736 { 1737 int rc; 1738 PVBOXHDD pDisk = pIoCtx->pDisk; 1739 size_t cbToRead = pIoCtx->Req.Io.cbTransfer; 1740 uint64_t uOffset = pIoCtx->Req.Io.uOffset; 1741 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur; 1742 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride; 1743 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead; 1744 size_t cbThisRead; 1745 1746 /* Loop until all reads started or we have a backend which needs to read metadata. */ 1747 do 1748 { 1749 /* Search for image with allocated block. Do not attempt to read more 1750 * than the previous reads marked as valid. Otherwise this would return 1751 * stale data when different block sizes are used for the images. */ 1752 cbThisRead = cbToRead; 1753 1754 if ( pDisk->pCache 1755 && !pImageParentOverride) 1756 { 1757 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead, 1758 pIoCtx, &cbThisRead); 1759 if (rc == VERR_VD_BLOCK_FREE) 1760 { 1761 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead, 1762 pIoCtx, &cbThisRead); 1763 1764 /* If the read was successful, write the data back into the cache. */ 1765 if ( RT_SUCCESS(rc) 1766 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE) 1767 { 1768 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead, 1769 pIoCtx, NULL); 1770 } 1771 } 1772 } 1773 else 1774 { 1775 1776 /* 1777 * Try to read from the given image. 1778 * If the block is not allocated read from override chain if present. 1779 */ 1780 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1781 uOffset, cbThisRead, pIoCtx, 1782 &cbThisRead); 1783 1784 if ( rc == VERR_VD_BLOCK_FREE 1785 && cImagesRead != 1) 1786 { 1787 unsigned cImagesToProcess = cImagesRead; 1788 1789 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev; 1790 pIoCtx->Req.Io.pImageParentOverride = NULL; 1791 1792 while (pCurrImage && rc == VERR_VD_BLOCK_FREE) 1793 { 1794 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1795 uOffset, cbThisRead, 1796 pIoCtx, &cbThisRead); 1797 if (cImagesToProcess == 1) 1798 break; 1799 else if (cImagesToProcess > 0) 1800 cImagesToProcess--; 1801 1802 if (rc == VERR_VD_BLOCK_FREE) 1803 pCurrImage = pCurrImage->pPrev; 1804 } 1805 } 1806 } 1807 1808 /* The task state will be updated on success already, don't do it here!. */ 1809 if (rc == VERR_VD_BLOCK_FREE) 1810 { 1811 /* No image in the chain contains the data for the block. */ 1812 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); 1813 1814 /* Fill the free space with 0 if we are told to do so 1815 * or a previous read returned valid data. */ 1816 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) 1817 vdIoCtxSet(pIoCtx, '\0', cbThisRead); 1818 else 1819 pIoCtx->Req.Io.cbBufClear += cbThisRead; 1820 1821 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1822 rc = VINF_VD_NEW_ZEROED_BLOCK; 1823 else 1824 rc = VINF_SUCCESS; 1825 } 1826 else if (rc == VERR_VD_IOCTX_HALT) 1827 { 1828 uOffset += cbThisRead; 1829 cbToRead -= cbThisRead; 1830 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1831 } 1832 else if ( RT_SUCCESS(rc) 1833 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1834 { 1835 /* First not free block, fill the space before with 0. */ 1836 if ( pIoCtx->Req.Io.cbBufClear 1837 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) 1838 { 1839 RTSGBUF SgBuf; 1840 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf); 1841 RTSgBufReset(&SgBuf); 1842 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear); 1843 pIoCtx->Req.Io.cbBufClear = 0; 1844 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1845 } 1846 rc = VINF_SUCCESS; 1847 } 1848 1849 if (RT_FAILURE(rc)) 1850 break; 1851 1852 cbToRead -= cbThisRead; 1853 uOffset += cbThisRead; 1854 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */ 1855 } while (cbToRead != 0 && RT_SUCCESS(rc)); 1856 1857 if ( rc == VERR_VD_NOT_ENOUGH_METADATA 1858 || rc == VERR_VD_IOCTX_HALT) 1859 { 1860 /* Save the current state. */ 1861 pIoCtx->Req.Io.uOffset = uOffset; 1862 pIoCtx->Req.Io.cbTransfer = cbToRead; 1863 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; 1864 } 1865 1866 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) 1867 ? VERR_VD_BLOCK_FREE 1868 : rc; 1869 } 1870 1871 /** 1872 * internal: parent image read wrapper for compacting. 1873 */ 1874 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, 1875 size_t cbRead) 1876 { 1877 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; 1878 1879 /** @todo 1880 * Only used for compaction so far which is not possible to mix with async I/O. 1881 * Needs to be changed if we want to support online compaction of images. 1882 */ 1883 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true); 1884 AssertMsgReturn(!fLocked, 1885 ("Calling synchronous parent read while another thread holds the disk lock\n"), 1886 VERR_VD_INVALID_STATE); 1887 1888 /* Fake an I/O context. */ 1889 RTSGSEG Segment; 1890 RTSGBUF SgBuf; 1891 VDIOCTX IoCtx; 1892 1893 Segment.pvSeg = pvBuf; 1894 Segment.cbSeg = cbRead; 1895 RTSgBufInit(&SgBuf, &Segment, 1); 1896 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage, 1897 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1898 int rc = vdReadHelperAsync(&IoCtx); 1899 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false); 954 1900 return rc; 955 1901 } … … 984 1930 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) 985 1931 { 986 int rc = VINF_SUCCESS; 987 size_t cbThisRead; 988 bool fAllFree = true; 989 size_t cbBufClear = 0; 990 991 /* Loop until all read. */ 992 do 993 { 994 /* Search for image with allocated block. Do not attempt to read more 995 * than the previous reads marked as valid. Otherwise this would return 996 * stale data when different block sizes are used for the images. */ 997 cbThisRead = cbRead; 998 999 if ( pDisk->pCache 1000 && !pImageParentOverride) 1001 { 1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1003 rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, 1004 cbThisRead, &cbThisRead); 1005 #endif 1006 if (rc == VERR_VD_BLOCK_FREE) 1007 { 1008 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, 1009 &cbThisRead); 1010 1011 /* If the read was successful, write the data back into the cache. */ 1012 if ( RT_SUCCESS(rc) 1013 && fUpdateCache) 1014 { 1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1016 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, 1017 cbThisRead, NULL); 1018 #endif 1019 } 1020 } 1021 } 1022 else 1023 { 1024 RTSGSEG SegmentBuf; 1025 RTSGBUF SgBuf; 1026 VDIOCTX IoCtx; 1027 1028 SegmentBuf.pvSeg = pvBuf; 1029 SegmentBuf.cbSeg = cbThisRead; 1030 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 1031 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 1032 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1033 1034 /* 1035 * Try to read from the given image. 1036 * If the block is not allocated read from override chain if present. 1037 */ 1038 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1039 uOffset, cbThisRead, &IoCtx, 1040 &cbThisRead); 1041 1042 if ( rc == VERR_VD_BLOCK_FREE 1043 && cImagesRead != 1) 1044 { 1045 unsigned cImagesToProcess = cImagesRead; 1046 1047 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; 1048 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; 1049 pCurrImage = pCurrImage->pPrev) 1050 { 1051 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1052 uOffset, cbThisRead, 1053 &IoCtx, &cbThisRead); 1054 if (cImagesToProcess == 1) 1055 break; 1056 else if (cImagesToProcess > 0) 1057 cImagesToProcess--; 1058 } 1059 } 1060 } 1061 1062 /* No image in the chain contains the data for the block. */ 1063 if (rc == VERR_VD_BLOCK_FREE) 1064 { 1065 /* Fill the free space with 0 if we are told to do so 1066 * or a previous read returned valid data. */ 1067 if (fZeroFreeBlocks || !fAllFree) 1068 memset(pvBuf, '\0', cbThisRead); 1069 else 1070 cbBufClear += cbThisRead; 1071 1072 if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1073 rc = VINF_VD_NEW_ZEROED_BLOCK; 1074 else 1075 rc = VINF_SUCCESS; 1076 } 1077 else if (RT_SUCCESS(rc)) 1078 { 1079 /* First not free block, fill the space before with 0. */ 1080 if (!fZeroFreeBlocks) 1081 { 1082 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); 1083 cbBufClear = 0; 1084 fAllFree = false; 1085 } 1086 } 1087 1088 cbRead -= cbThisRead; 1089 uOffset += cbThisRead; 1090 pvBuf = (char *)pvBuf + cbThisRead; 1091 } while (cbRead != 0 && RT_SUCCESS(rc)); 1092 1093 return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; 1932 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 1933 RTSGSEG Segment; 1934 RTSGBUF SgBuf; 1935 VDIOCTX IoCtx; 1936 1937 if (fZeroFreeBlocks) 1938 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1939 if (fUpdateCache) 1940 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 1941 1942 Segment.pvSeg = pvBuf; 1943 Segment.cbSeg = cbRead; 1944 RTSgBufInit(&SgBuf, &Segment, 1); 1945 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, 1946 NULL, vdReadHelperAsync, fFlags); 1947 1948 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 1949 IoCtx.Req.Io.cImagesRead = cImagesRead; 1950 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 1951 IoCtx.Type.Root.pvUser1 = pDisk; 1952 IoCtx.Type.Root.pvUser2 = NULL; 1953 return vdIoCtxProcessSync(&IoCtx); 1094 1954 } 1095 1955 … … 1103 1963 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, 1104 1964 true /* fZeroFreeBlocks */, fUpdateCache, 0); 1105 }1106 1107 /**1108 * Creates a new empty discard state.1109 *1110 * @returns Pointer to the new discard state or NULL if out of memory.1111 */1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)1113 {1114 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));1115 1116 if (pDiscard)1117 {1118 RTListInit(&pDiscard->ListLru);1119 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));1120 if (!pDiscard->pTreeBlocks)1121 {1122 RTMemFree(pDiscard);1123 pDiscard = NULL;1124 }1125 }1126 1127 return pDiscard;1128 }1129 1130 /**1131 * Removes the least recently used blocks from the waiting list until1132 * the new value is reached.1133 *1134 * @returns VBox status code.1135 * @param pDisk VD disk container.1136 * @param pDiscard The discard state.1137 * @param cbDiscardingNew How many bytes should be waiting on success.1138 * The number of bytes waiting can be less.1139 */1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)1141 {1142 int rc = VINF_SUCCESS;1143 1144 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",1145 pDisk, pDiscard, cbDiscardingNew));1146 1147 while (pDiscard->cbDiscarding > cbDiscardingNew)1148 {1149 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);1150 1151 Assert(!RTListIsEmpty(&pDiscard->ListLru));1152 1153 /* Go over the allocation bitmap and mark all discarded sectors as unused. */1154 uint64_t offStart = pBlock->Core.Key;1155 uint32_t idxStart = 0;1156 size_t cbLeft = pBlock->cbDiscard;1157 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);1158 uint32_t cSectors = pBlock->cbDiscard / 512;1159 1160 while (cbLeft > 0)1161 {1162 int32_t idxEnd;1163 size_t cbThis = cbLeft;1164 1165 if (fAllocated)1166 {1167 /* Check for the first unallocated bit. */1168 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);1169 if (idxEnd != -1)1170 {1171 cbThis = (idxEnd - idxStart) * 512;1172 fAllocated = false;1173 }1174 }1175 else1176 {1177 /* Mark as unused and check for the first set bit. */1178 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);1179 if (idxEnd != -1)1180 cbThis = (idxEnd - idxStart) * 512;1181 1182 1183 VDIOCTX IoCtx;1184 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,1185 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);1186 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,1187 &IoCtx, offStart, cbThis, NULL,1188 NULL, &cbThis, NULL,1189 VD_DISCARD_MARK_UNUSED);1190 if (RT_FAILURE(rc))1191 break;1192 1193 fAllocated = true;1194 }1195 1196 idxStart = idxEnd;1197 offStart += cbThis;1198 cbLeft -= cbThis;1199 }1200 1201 if (RT_FAILURE(rc))1202 break;1203 1204 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);1205 Assert(pBlockRemove == pBlock);1206 RTListNodeRemove(&pBlock->NodeLru);1207 1208 pDiscard->cbDiscarding -= pBlock->cbDiscard;1209 RTMemFree(pBlock->pbmAllocated);1210 RTMemFree(pBlock);1211 }1212 1213 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);1214 1215 LogFlowFunc(("returns rc=%Rrc\n", rc));1216 return rc;1217 }1218 1219 /**1220 * Destroys the current discard state, writing any waiting blocks to the image.1221 *1222 * @returns VBox status code.1223 * @param pDisk VD disk container.1224 */1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)1226 {1227 int rc = VINF_SUCCESS;1228 1229 if (pDisk->pDiscard)1230 {1231 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);1232 AssertRC(rc);1233 RTMemFree(pDisk->pDiscard->pTreeBlocks);1234 RTMemFree(pDisk->pDiscard);1235 pDisk->pDiscard = NULL;1236 }1237 1238 return rc;1239 }1240 1241 /**1242 * Marks the given range as allocated in the image.1243 * Required if there are discards in progress and a write to a block which can get discarded1244 * is written to.1245 *1246 * @returns VBox status code.1247 * @param pDisk VD container data.1248 * @param uOffset First byte to mark as allocated.1249 * @param cbRange Number of bytes to mark as allocated.1250 */1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)1252 {1253 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;1254 int rc = VINF_SUCCESS;1255 1256 if (pDiscard)1257 {1258 do1259 {1260 size_t cbThisRange = cbRange;1261 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);1262 1263 if (pBlock)1264 {1265 int32_t idxStart, idxEnd;1266 1267 Assert(!(cbThisRange % 512));1268 Assert(!((uOffset - pBlock->Core.Key) % 512));1269 1270 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);1271 1272 idxStart = (uOffset - pBlock->Core.Key) / 512;1273 idxEnd = idxStart + (cbThisRange / 512);1274 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);1275 }1276 else1277 {1278 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);1279 if (pBlock)1280 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);1281 }1282 1283 Assert(cbRange >= cbThisRange);1284 1285 uOffset += cbThisRange;1286 cbRange -= cbThisRange;1287 } while (cbRange != 0);1288 }1289 1290 return rc;1291 }1292 1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1294 uint64_t uOffset, size_t cbTransfer,1295 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,1296 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1297 uint32_t fFlags)1298 {1299 PVDIOCTX pIoCtx = NULL;1300 1301 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1302 if (RT_LIKELY(pIoCtx))1303 {1304 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1305 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1306 }1307 1308 return pIoCtx;1309 }1310 1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1312 uint64_t uOffset, size_t cbTransfer,1313 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1314 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1315 void *pvUser1, void *pvUser2,1316 void *pvAllocation,1317 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1318 uint32_t fFlags)1319 {1320 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1321 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1322 1323 if (RT_LIKELY(pIoCtx))1324 {1325 pIoCtx->pIoCtxParent = NULL;1326 pIoCtx->Type.Root.pfnComplete = pfnComplete;1327 pIoCtx->Type.Root.pvUser1 = pvUser1;1328 pIoCtx->Type.Root.pvUser2 = pvUser2;1329 }1330 1331 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));1332 return pIoCtx;1333 }1334 1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,1336 unsigned cRanges,1337 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1338 void *pvUser1, void *pvUser2,1339 void *pvAllocation,1340 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1341 uint32_t fFlags)1342 {1343 PVDIOCTX pIoCtx = NULL;1344 1345 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1346 if (RT_LIKELY(pIoCtx))1347 {1348 pIoCtx->pIoCtxNext = NULL;1349 pIoCtx->pDisk = pDisk;1350 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;1351 pIoCtx->cDataTransfersPending = 0;1352 pIoCtx->cMetaTransfersPending = 0;1353 pIoCtx->fComplete = false;1354 pIoCtx->fFlags = fFlags;1355 pIoCtx->pvAllocation = pvAllocation;1356 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;1357 pIoCtx->pfnIoCtxTransferNext = NULL;1358 pIoCtx->rcReq = VINF_SUCCESS;1359 pIoCtx->Req.Discard.paRanges = paRanges;1360 pIoCtx->Req.Discard.cRanges = cRanges;1361 pIoCtx->Req.Discard.idxRange = 0;1362 pIoCtx->Req.Discard.cbDiscardLeft = 0;1363 pIoCtx->Req.Discard.offCur = 0;1364 pIoCtx->Req.Discard.cbThisDiscard = 0;1365 1366 pIoCtx->pIoCtxParent = NULL;1367 pIoCtx->Type.Root.pfnComplete = pfnComplete;1368 pIoCtx->Type.Root.pvUser1 = pvUser1;1369 pIoCtx->Type.Root.pvUser2 = pvUser2;1370 }1371 1372 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));1373 return pIoCtx;1374 }1375 1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1377 uint64_t uOffset, size_t cbTransfer,1378 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1379 PVDIOCTX pIoCtxParent, size_t cbTransferParent,1380 size_t cbWriteParent, void *pvAllocation,1381 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)1382 {1383 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1384 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);1385 1386 AssertPtr(pIoCtxParent);1387 Assert(!pIoCtxParent->pIoCtxParent);1388 1389 if (RT_LIKELY(pIoCtx))1390 {1391 pIoCtx->pIoCtxParent = pIoCtxParent;1392 pIoCtx->Type.Child.uOffsetSaved = uOffset;1393 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;1394 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;1395 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;1396 }1397 1398 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));1399 return pIoCtx;1400 }1401 1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)1403 {1404 PVDIOTASK pIoTask = NULL;1405 1406 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1407 if (pIoTask)1408 {1409 pIoTask->pIoStorage = pIoStorage;1410 pIoTask->pfnComplete = pfnComplete;1411 pIoTask->pvUser = pvUser;1412 pIoTask->fMeta = false;1413 pIoTask->Type.User.cbTransfer = cbTransfer;1414 pIoTask->Type.User.pIoCtx = pIoCtx;1415 }1416 1417 return pIoTask;1418 }1419 1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)1421 {1422 PVDIOTASK pIoTask = NULL;1423 1424 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1425 if (pIoTask)1426 {1427 pIoTask->pIoStorage = pIoStorage;1428 pIoTask->pfnComplete = pfnComplete;1429 pIoTask->pvUser = pvUser;1430 pIoTask->fMeta = true;1431 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;1432 }1433 1434 return pIoTask;1435 }1436 1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1438 {1439 LogFlow(("Freeing I/O context %#p\n", pIoCtx));1440 if (pIoCtx->pvAllocation)1441 RTMemFree(pIoCtx->pvAllocation);1442 #ifdef DEBUG1443 memset(pIoCtx, 0xff, sizeof(VDIOCTX));1444 #endif1445 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);1446 }1447 1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)1449 {1450 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);1451 }1452 1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)1454 {1455 AssertPtr(pIoCtx->pIoCtxParent);1456 1457 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);1458 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;1459 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;1460 }1461 1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)1463 {1464 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));1465 1466 if (RT_LIKELY(pMetaXfer))1467 {1468 pMetaXfer->Core.Key = uOffset;1469 pMetaXfer->Core.KeyLast = uOffset + cb - 1;1470 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;1471 pMetaXfer->cbMeta = cb;1472 pMetaXfer->pIoStorage = pIoStorage;1473 pMetaXfer->cRefs = 0;1474 RTListInit(&pMetaXfer->ListIoCtxWaiting);1475 }1476 return pMetaXfer;1477 }1478 1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)1480 {1481 /* Put it on the waiting list. */1482 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);1483 PVDIOCTX pHeadOld;1484 pIoCtx->pIoCtxNext = pNext;1485 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))1486 {1487 pNext = pHeadOld;1488 Assert(pNext != pIoCtx);1489 pIoCtx->pIoCtxNext = pNext;1490 ASMNopPause();1491 }1492 }1493 1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1495 {1496 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));1497 1498 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));1499 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1500 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);1501 }1502 1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)1504 {1505 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);1506 }1507 1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)1509 {1510 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);1511 }1512 1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)1514 {1515 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1516 }1517 1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)1519 {1520 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1521 }1522 1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)1524 {1525 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);1526 }1527 1528 /**1529 * Process the I/O context, core method which assumes that the I/O context1530 * acquired the lock.1531 *1532 * @returns VBox status code.1533 * @param pIoCtx I/O context to process.1534 */1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)1536 {1537 int rc = VINF_SUCCESS;1538 1539 VD_IS_LOCKED(pIoCtx->pDisk);1540 1541 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1542 1543 if ( !pIoCtx->cMetaTransfersPending1544 && !pIoCtx->cDataTransfersPending1545 && !pIoCtx->pfnIoCtxTransfer)1546 {1547 rc = VINF_VD_ASYNC_IO_FINISHED;1548 goto out;1549 }1550 1551 /*1552 * We complete the I/O context in case of an error1553 * if there is no I/O task pending.1554 */1555 if ( RT_FAILURE(pIoCtx->rcReq)1556 && !pIoCtx->cMetaTransfersPending1557 && !pIoCtx->cDataTransfersPending)1558 {1559 rc = VINF_VD_ASYNC_IO_FINISHED;1560 goto out;1561 }1562 1563 /* Don't change anything if there is a metadata transfer pending or we are blocked. */1564 if ( pIoCtx->cMetaTransfersPending1565 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))1566 {1567 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1568 goto out;1569 }1570 1571 if (pIoCtx->pfnIoCtxTransfer)1572 {1573 /* Call the transfer function advancing to the next while there is no error. */1574 while ( pIoCtx->pfnIoCtxTransfer1575 && !pIoCtx->cMetaTransfersPending1576 && RT_SUCCESS(rc))1577 {1578 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));1579 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);1580 1581 /* Advance to the next part of the transfer if the current one succeeded. */1582 if (RT_SUCCESS(rc))1583 {1584 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;1585 pIoCtx->pfnIoCtxTransferNext = NULL;1586 }1587 }1588 }1589 1590 if ( RT_SUCCESS(rc)1591 && !pIoCtx->cMetaTransfersPending1592 && !pIoCtx->cDataTransfersPending)1593 rc = VINF_VD_ASYNC_IO_FINISHED;1594 else if ( RT_SUCCESS(rc)1595 || rc == VERR_VD_NOT_ENOUGH_METADATA1596 || rc == VERR_VD_IOCTX_HALT)1597 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1598 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))1599 {1600 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);1601 /*1602 * The I/O context completed if we have an error and there is no data1603 * or meta data transfer pending.1604 */1605 if ( !pIoCtx->cMetaTransfersPending1606 && !pIoCtx->cDataTransfersPending)1607 rc = VINF_VD_ASYNC_IO_FINISHED;1608 else1609 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1610 }1611 1612 out:1613 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",1614 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,1615 pIoCtx->fComplete));1616 1617 return rc;1618 }1619 1620 /**1621 * Processes the list of waiting I/O contexts.1622 *1623 * @returns VBox status code.1624 * @param pDisk The disk structure.1625 * @param pIoCtxRc An I/O context handle which waits on the list. When processed1626 * The status code is returned. NULL if there is no I/O context1627 * to return the status code for.1628 */1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)1630 {1631 int rc = VINF_SUCCESS;1632 1633 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));1634 1635 VD_IS_LOCKED(pDisk);1636 1637 /* Get the waiting list and process it in FIFO order. */1638 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);1639 1640 /* Reverse it. */1641 PVDIOCTX pCur = pIoCtxHead;1642 pIoCtxHead = NULL;1643 while (pCur)1644 {1645 PVDIOCTX pInsert = pCur;1646 pCur = pCur->pIoCtxNext;1647 pInsert->pIoCtxNext = pIoCtxHead;1648 pIoCtxHead = pInsert;1649 }1650 1651 /* Process now. */1652 pCur = pIoCtxHead;1653 while (pCur)1654 {1655 int rcTmp;1656 PVDIOCTX pTmp = pCur;1657 1658 pCur = pCur->pIoCtxNext;1659 pTmp->pIoCtxNext = NULL;1660 1661 rcTmp = vdIoCtxProcessLocked(pTmp);1662 if (pTmp == pIoCtxRc)1663 {1664 /* The given I/O context was processed, pass the return code to the caller. */1665 rc = rcTmp;1666 }1667 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED1668 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1669 {1670 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1671 vdThreadFinishWrite(pDisk);1672 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1673 pTmp->Type.Root.pvUser2,1674 pTmp->rcReq);1675 vdIoCtxFree(pDisk, pTmp);1676 }1677 }1678 1679 LogFlowFunc(("returns rc=%Rrc\n", rc));1680 return rc;1681 }1682 1683 /**1684 * Processes the list of blocked I/O contexts.1685 *1686 * @returns nothing.1687 * @param pDisk The disk structure.1688 */1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)1690 {1691 LogFlowFunc(("pDisk=%#p\n", pDisk));1692 1693 VD_IS_LOCKED(pDisk);1694 1695 /* Get the waiting list and process it in FIFO order. */1696 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);1697 1698 /* Reverse it. */1699 PVDIOCTX pCur = pIoCtxHead;1700 pIoCtxHead = NULL;1701 while (pCur)1702 {1703 PVDIOCTX pInsert = pCur;1704 pCur = pCur->pIoCtxNext;1705 pInsert->pIoCtxNext = pIoCtxHead;1706 pIoCtxHead = pInsert;1707 }1708 1709 /* Process now. */1710 pCur = pIoCtxHead;1711 while (pCur)1712 {1713 int rc;1714 PVDIOCTX pTmp = pCur;1715 1716 pCur = pCur->pIoCtxNext;1717 pTmp->pIoCtxNext = NULL;1718 1719 Assert(!pTmp->pIoCtxParent);1720 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);1721 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;1722 1723 rc = vdIoCtxProcessLocked(pTmp);1724 if ( rc == VINF_VD_ASYNC_IO_FINISHED1725 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1726 {1727 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1728 vdThreadFinishWrite(pDisk);1729 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1730 pTmp->Type.Root.pvUser2,1731 pTmp->rcReq);1732 vdIoCtxFree(pDisk, pTmp);1733 }1734 }1735 1736 LogFlowFunc(("returns\n"));1737 }1738 1739 /**1740 * Processes the I/O context trying to lock the criticial section.1741 * The context is deferred if the critical section is busy.1742 *1743 * @returns VBox status code.1744 * @param pIoCtx The I/O context to process.1745 */1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)1747 {1748 int rc = VINF_SUCCESS;1749 PVBOXHDD pDisk = pIoCtx->pDisk;1750 1751 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1752 1753 /* Put it on the waiting list first. */1754 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);1755 1756 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))1757 {1758 /* Leave it again, the context will be processed just before leaving the lock. */1759 LogFlowFunc(("Successfully acquired the lock\n"));1760 rc = vdDiskUnlock(pDisk, pIoCtx);1761 }1762 else1763 {1764 LogFlowFunc(("Lock is held\n"));1765 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1766 }1767 1768 return rc;1769 }1770 1771 /**1772 * Process the I/O context in a synchronous manner, waiting1773 * for it to complete.1774 *1775 * @returns VBox status code of the completed request.1776 * @param pIoCtx The sync I/O context.1777 */1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)1779 {1780 int rc = VINF_SUCCESS;1781 PVBOXHDD pDisk = pIoCtx->pDisk;1782 1783 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));1784 1785 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,1786 ("I/O context is not marked as synchronous\n"));1787 1788 rc = vdIoCtxProcessTryLockDefer(pIoCtx);1789 if (rc == VINF_VD_ASYNC_IO_FINISHED)1790 rc = VINF_SUCCESS;1791 1792 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1793 {1794 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);1795 AssertRC(rc);1796 1797 rc = pDisk->rcSync;1798 }1799 else /* Success or error. */1800 vdIoCtxFree(pDisk, pIoCtx);1801 1802 return rc;1803 }1804 1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1806 {1807 return pDisk->pIoCtxLockOwner == pIoCtx;1808 }1809 1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1811 {1812 int rc = VINF_SUCCESS;1813 1814 VD_IS_LOCKED(pDisk);1815 1816 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));1817 1818 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))1819 {1820 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */1821 vdIoCtxDefer(pDisk, pIoCtx);1822 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1823 }1824 1825 LogFlowFunc(("returns -> %Rrc\n", rc));1826 return rc;1827 }1828 1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)1830 {1831 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",1832 pDisk, pIoCtx, fProcessBlockedReqs));1833 1834 VD_IS_LOCKED(pDisk);1835 1836 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));1837 Assert(pDisk->pIoCtxLockOwner == pIoCtx);1838 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);1839 1840 if (fProcessBlockedReqs)1841 {1842 /* Process any blocked writes if the current request didn't caused another growing. */1843 vdDiskProcessBlockedIoCtx(pDisk);1844 }1845 1846 LogFlowFunc(("returns\n"));1847 }1848 1849 /**1850 * internal: read the specified amount of data in whatever blocks the backend1851 * will give us - async version.1852 */1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)1854 {1855 int rc;1856 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;1857 uint64_t uOffset = pIoCtx->Req.Io.uOffset;1858 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;1859 size_t cbThisRead;1860 1861 /* Loop until all reads started or we have a backend which needs to read metadata. */1862 do1863 {1864 /* Search for image with allocated block. Do not attempt to read more1865 * than the previous reads marked as valid. Otherwise this would return1866 * stale data when different block sizes are used for the images. */1867 cbThisRead = cbToRead;1868 1869 /*1870 * Try to read from the given image.1871 * If the block is not allocated read from override chain if present.1872 */1873 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1874 uOffset, cbThisRead,1875 pIoCtx, &cbThisRead);1876 1877 if (rc == VERR_VD_BLOCK_FREE)1878 {1879 while ( pCurrImage->pPrev != NULL1880 && rc == VERR_VD_BLOCK_FREE)1881 {1882 pCurrImage = pCurrImage->pPrev;1883 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1884 uOffset, cbThisRead,1885 pIoCtx, &cbThisRead);1886 }1887 }1888 1889 /* The task state will be updated on success already, don't do it here!. */1890 if (rc == VERR_VD_BLOCK_FREE)1891 {1892 /* No image in the chain contains the data for the block. */1893 vdIoCtxSet(pIoCtx, '\0', cbThisRead);1894 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);1895 rc = VINF_SUCCESS;1896 }1897 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1898 rc = VINF_SUCCESS;1899 else if (rc == VERR_VD_IOCTX_HALT)1900 {1901 uOffset += cbThisRead;1902 cbToRead -= cbThisRead;1903 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1904 }1905 1906 if (RT_FAILURE(rc))1907 break;1908 1909 cbToRead -= cbThisRead;1910 uOffset += cbThisRead;1911 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */1912 } while (cbToRead != 0 && RT_SUCCESS(rc));1913 1914 if ( rc == VERR_VD_NOT_ENOUGH_METADATA1915 || rc == VERR_VD_IOCTX_HALT)1916 {1917 /* Save the current state. */1918 pIoCtx->Req.Io.uOffset = uOffset;1919 pIoCtx->Req.Io.cbTransfer = cbToRead;1920 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;1921 }1922 1923 return rc;1924 }1925 1926 /**1927 * internal: parent image read wrapper for compacting.1928 */1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,1930 size_t cbRead)1931 {1932 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;1933 return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,1934 pvBuf, cbRead, false /* fUpdateCache */);1935 1965 } 1936 1966 … … 1984 2014 1985 2015 /** 1986 * internal: write a complete block (only used for diff images), taking the1987 * remaining data from parent images. This implementation does not optimize1988 * anything (except that it tries to read only that portions from parent1989 * images that are really needed).1990 */1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,1992 PVDIMAGE pImageParentOverride,1993 uint64_t uOffset, size_t cbWrite,1994 size_t cbThisWrite, size_t cbPreRead,1995 size_t cbPostRead, const void *pvBuf,1996 void *pvTmp)1997 {1998 int rc = VINF_SUCCESS;1999 2000 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2001 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2002 2003 /* Read the data that goes before the write to fill the block. */2004 if (cbPreRead)2005 {2006 /*2007 * Updating the cache doesn't make sense here because2008 * this will be done after the complete block was written.2009 */2010 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2011 uOffset - cbPreRead, pvTmp, cbPreRead,2012 true /* fZeroFreeBlocks*/,2013 false /* fUpdateCache */, 0);2014 if (RT_FAILURE(rc))2015 return rc;2016 }2017 2018 /* Copy the data to the right place in the buffer. */2019 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2020 2021 /* Read the data that goes after the write to fill the block. */2022 if (cbPostRead)2023 {2024 /* If we have data to be written, use that instead of reading2025 * data from the image. */2026 size_t cbWriteCopy;2027 if (cbWrite > cbThisWrite)2028 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2029 else2030 cbWriteCopy = 0;2031 /* Figure out how much we cannot read from the image, because2032 * the last block to write might exceed the nominal size of the2033 * image for technical reasons. */2034 size_t cbFill;2035 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2036 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2037 else2038 cbFill = 0;2039 /* The rest must be read from the image. */2040 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;2041 2042 /* Now assemble the remaining data. */2043 if (cbWriteCopy)2044 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2045 (char *)pvBuf + cbThisWrite, cbWriteCopy);2046 if (cbReadImage)2047 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2048 uOffset + cbThisWrite + cbWriteCopy,2049 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,2050 cbReadImage, true /* fZeroFreeBlocks */,2051 false /* fUpdateCache */, 0);2052 if (RT_FAILURE(rc))2053 return rc;2054 /* Zero out the remainder of this block. Will never be visible, as this2055 * is beyond the limit of the image. */2056 if (cbFill)2057 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2058 '\0', cbFill);2059 }2060 2061 /* Write the full block to the virtual disk. */2062 RTSGSEG SegmentBuf;2063 RTSGBUF SgBuf;2064 VDIOCTX IoCtx;2065 2066 SegmentBuf.pvSeg = pvTmp;2067 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2068 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2069 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2070 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2071 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2072 cbPreRead + cbThisWrite + cbPostRead,2073 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2074 Assert(rc != VERR_VD_BLOCK_FREE);2075 Assert(cbPreRead == 0);2076 Assert(cbPostRead == 0);2077 2078 return rc;2079 }2080 2081 /**2082 * internal: write a complete block (only used for diff images), taking the2083 * remaining data from parent images. This implementation optimizes out writes2084 * that do not change the data relative to the state as of the parent images.2085 * All backends which support differential/growing images support this.2086 */2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,2088 PVDIMAGE pImageParentOverride,2089 uint64_t uOffset, size_t cbWrite,2090 size_t cbThisWrite, size_t cbPreRead,2091 size_t cbPostRead, const void *pvBuf,2092 void *pvTmp, unsigned cImagesRead)2093 {2094 size_t cbFill = 0;2095 size_t cbWriteCopy = 0;2096 size_t cbReadImage = 0;2097 int rc;2098 2099 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2100 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2101 2102 if (cbPostRead)2103 {2104 /* Figure out how much we cannot read from the image, because2105 * the last block to write might exceed the nominal size of the2106 * image for technical reasons. */2107 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2108 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2109 2110 /* If we have data to be written, use that instead of reading2111 * data from the image. */2112 if (cbWrite > cbThisWrite)2113 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2114 2115 /* The rest must be read from the image. */2116 cbReadImage = cbPostRead - cbWriteCopy - cbFill;2117 }2118 2119 /* Read the entire data of the block so that we can compare whether it will2120 * be modified by the write or not. */2121 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,2122 cbPreRead + cbThisWrite + cbPostRead - cbFill,2123 true /* fZeroFreeBlocks */, false /* fUpdateCache */,2124 cImagesRead);2125 if (RT_FAILURE(rc))2126 return rc;2127 2128 /* Check if the write would modify anything in this block. */2129 if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)2130 && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,2131 (char *)pvBuf + cbThisWrite, cbWriteCopy)))2132 {2133 /* Block is completely unchanged, so no need to write anything. */2134 return VINF_SUCCESS;2135 }2136 2137 /* Copy the data to the right place in the buffer. */2138 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2139 2140 /* Handle the data that goes after the write to fill the block. */2141 if (cbPostRead)2142 {2143 /* Now assemble the remaining data. */2144 if (cbWriteCopy)2145 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2146 (char *)pvBuf + cbThisWrite, cbWriteCopy);2147 /* Zero out the remainder of this block. Will never be visible, as this2148 * is beyond the limit of the image. */2149 if (cbFill)2150 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2151 '\0', cbFill);2152 }2153 2154 /* Write the full block to the virtual disk. */2155 RTSGSEG SegmentBuf;2156 RTSGBUF SgBuf;2157 VDIOCTX IoCtx;2158 2159 SegmentBuf.pvSeg = pvTmp;2160 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2161 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2162 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2163 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2164 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2165 cbPreRead + cbThisWrite + cbPostRead,2166 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2167 Assert(rc != VERR_VD_BLOCK_FREE);2168 Assert(cbPreRead == 0);2169 Assert(cbPostRead == 0);2170 2171 return rc;2172 }2173 2174 /**2175 2016 * internal: write buffer to the image, taking care of block boundaries and 2176 2017 * write optimizations. … … 2181 2022 bool fUpdateCache, unsigned cImagesRead) 2182 2023 { 2183 int rc; 2184 unsigned fWrite; 2185 size_t cbThisWrite; 2186 size_t cbPreRead, cbPostRead; 2187 uint64_t uOffsetCur = uOffset; 2188 size_t cbWriteCur = cbWrite; 2189 const void *pcvBufCur = pvBuf; 2190 RTSGSEG SegmentBuf; 2024 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 2025 RTSGSEG Segment; 2191 2026 RTSGBUF SgBuf; 2192 2027 VDIOCTX IoCtx; 2193 2028 2194 /* Loop until all written. */ 2195 do 2196 { 2197 /* Try to write the possibly partial block to the last opened image. 2198 * This works when the block is already allocated in this image or 2199 * if it is a full-block write (and allocation isn't suppressed below). 2200 * For image formats which don't support zero blocks, it's beneficial 2201 * to avoid unnecessarily allocating unchanged blocks. This prevents 2202 * unwanted expanding of images. VMDK is an example. */ 2203 cbThisWrite = cbWriteCur; 2204 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) 2205 ? 0 : VD_WRITE_NO_ALLOC; 2206 2207 SegmentBuf.pvSeg = (void *)pcvBufCur; 2208 SegmentBuf.cbSeg = cbWrite; 2209 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2210 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2211 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2212 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite, 2213 &IoCtx, &cbThisWrite, &cbPreRead, 2214 &cbPostRead, fWrite); 2215 if (rc == VERR_VD_BLOCK_FREE) 2216 { 2217 void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); 2218 AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); 2219 2220 if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) 2221 { 2222 /* Optimized write, suppress writing to a so far unallocated 2223 * block if the data is in fact not changed. */ 2224 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, 2225 uOffsetCur, cbWriteCur, 2226 cbThisWrite, cbPreRead, cbPostRead, 2227 pcvBufCur, pvTmp, cImagesRead); 2228 } 2229 else 2230 { 2231 /* Normal write, not optimized in any way. The block will 2232 * be written no matter what. This will usually (unless the 2233 * backend has some further optimization enabled) cause the 2234 * block to be allocated. */ 2235 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, 2236 uOffsetCur, cbWriteCur, 2237 cbThisWrite, cbPreRead, cbPostRead, 2238 pcvBufCur, pvTmp); 2239 } 2240 RTMemTmpFree(pvTmp); 2241 if (RT_FAILURE(rc)) 2242 break; 2243 } 2244 2245 cbWriteCur -= cbThisWrite; 2246 uOffsetCur += cbThisWrite; 2247 pcvBufCur = (char *)pcvBufCur + cbThisWrite; 2248 } while (cbWriteCur != 0 && RT_SUCCESS(rc)); 2249 2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */ 2251 /* Update the cache on success */ 2252 if ( RT_SUCCESS(rc) 2253 && pDisk->pCache 2254 && fUpdateCache) 2255 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); 2256 2257 if (RT_SUCCESS(rc)) 2258 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); 2259 #endif 2260 2261 return rc; 2029 if (fUpdateCache) 2030 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 2031 2032 Segment.pvSeg = (void *)pvBuf; 2033 Segment.cbSeg = cbWrite; 2034 RTSgBufInit(&SgBuf, &Segment, 1); 2035 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, 2036 NULL, vdWriteHelperAsync, fFlags); 2037 2038 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 2039 IoCtx.Req.Io.cImagesRead = cImagesRead; 2040 IoCtx.pIoCtxParent = NULL; 2041 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 2042 IoCtx.Type.Root.pvUser1 = pDisk; 2043 IoCtx.Type.Root.pvUser2 = NULL; 2044 return vdIoCtxProcessSync(&IoCtx); 2262 2045 } 2263 2046 … … 2491 2274 } 2492 2275 2493 /** 2494 * internal: write a complete block (only used for diff images), taking the 2495 * remaining data from parent images. This implementation does not optimize 2496 * anything (except that it tries to read only that portions from parent 2497 * images that are really needed) - async version. 2498 */ 2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2500 { 2501 int rc = VINF_SUCCESS; 2502 2503 #if 0 2504 2505 /* Read the data that goes before the write to fill the block. */ 2506 if (cbPreRead) 2507 { 2508 rc = vdReadHelperAsync(pIoCtxDst); 2509 if (RT_FAILURE(rc)) 2510 return rc; 2511 } 2512 2513 /* Copy the data to the right place in the buffer. */ 2514 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); 2515 2516 /* Read the data that goes after the write to fill the block. */ 2517 if (cbPostRead) 2518 { 2519 /* If we have data to be written, use that instead of reading 2520 * data from the image. */ 2521 size_t cbWriteCopy; 2522 if (cbWrite > cbThisWrite) 2523 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2524 else 2525 cbWriteCopy = 0; 2526 /* Figure out how much we cannot read from the image, because 2527 * the last block to write might exceed the nominal size of the 2528 * image for technical reasons. */ 2529 size_t cbFill; 2530 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2531 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2532 else 2533 cbFill = 0; 2534 /* The rest must be read from the image. */ 2535 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2536 2537 /* Now assemble the remaining data. */ 2538 if (cbWriteCopy) 2539 { 2540 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); 2541 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); 2542 } 2543 2544 if (cbReadImage) 2545 rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, 2546 uOffset + cbThisWrite + cbWriteCopy, 2547 cbReadImage); 2548 if (RT_FAILURE(rc)) 2549 return rc; 2550 /* Zero out the remainder of this block. Will never be visible, as this 2551 * is beyond the limit of the image. */ 2552 if (cbFill) 2553 { 2554 vdIoCtxSet(pIoCtxDst, '\0', cbFill); 2555 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); 2556 } 2557 } 2558 2559 if ( !pIoCtxDst->cbTransferLeft 2560 && !pIoCtxDst->cMetaTransfersPending 2561 && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) 2562 { 2563 /* Write the full block to the virtual disk. */ 2564 vdIoCtxChildReset(pIoCtxDst); 2565 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2566 uOffset - cbPreRead, 2567 cbPreRead + cbThisWrite + cbPostRead, 2568 pIoCtxDst, 2569 NULL, &cbPreRead, &cbPostRead, 0); 2570 Assert(rc != VERR_VD_BLOCK_FREE); 2571 Assert(cbPreRead == 0); 2572 Assert(cbPostRead == 0); 2573 } 2574 else 2575 { 2576 LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 2577 pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, 2578 pIoCtxDst->fComplete)); 2579 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2580 } 2581 2582 return rc; 2583 #endif 2584 return VERR_NOT_IMPLEMENTED; 2585 } 2586 2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) 2276 static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) 2588 2277 { 2589 2278 int rc = VINF_SUCCESS; … … 2595 2284 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2596 2285 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2597 2598 2599 2286 pIoCtx->Req.Io.uOffset - cbPreRead, 2287 cbPreRead + cbThisWrite + cbPostRead, 2288 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2600 2289 Assert(rc != VERR_VD_BLOCK_FREE); 2601 2290 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); … … 2686 2375 /* Write the full block to the virtual disk. */ 2687 2376 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2688 pIoCtx->pfnIoCtxTransferNext = vdWriteHelper OptimizedCommitAsync;2377 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2689 2378 2690 2379 return rc; … … 2696 2385 2697 2386 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2387 2388 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2698 2389 2699 2390 if (pIoCtx->Req.Io.cbTransferLeft) … … 2762 2453 /* Next step */ 2763 2454 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync; 2455 return VINF_SUCCESS; 2456 } 2457 2458 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx) 2459 { 2460 int rc = VINF_SUCCESS; 2461 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2462 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2463 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent; 2464 2465 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2466 2467 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite); 2468 if (cbPostRead) 2469 { 2470 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill; 2471 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy; 2472 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage; 2473 2474 /* Now assemble the remaining data. */ 2475 if (cbWriteCopy) 2476 { 2477 /* 2478 * The S/G buffer of the parent needs to be cloned because 2479 * it is not allowed to modify the state. 2480 */ 2481 RTSGBUF SgBufParentTmp; 2482 2483 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf); 2484 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy); 2485 } 2486 2487 /* Zero out the remainder of this block. Will never be visible, as this 2488 * is beyond the limit of the image. */ 2489 if (cbFill) 2490 { 2491 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage); 2492 vdIoCtxSet(pIoCtx, '\0', cbFill); 2493 } 2494 2495 if (cbReadImage) 2496 { 2497 /* Read remaining data. */ 2498 } 2499 else 2500 { 2501 /* Write the full block to the virtual disk. */ 2502 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2503 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2504 } 2505 } 2506 else 2507 { 2508 /* Write the full block to the virtual disk. */ 2509 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2510 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2511 } 2512 2513 return rc; 2514 } 2515 2516 static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx) 2517 { 2518 int rc = VINF_SUCCESS; 2519 2520 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2521 2522 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2523 2524 if (pIoCtx->Req.Io.cbTransferLeft) 2525 rc = vdReadHelperAsync(pIoCtx); 2526 2527 if ( RT_SUCCESS(rc) 2528 && ( pIoCtx->Req.Io.cbTransferLeft 2529 || pIoCtx->cMetaTransfersPending)) 2530 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2531 else 2532 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2533 2534 return rc; 2535 } 2536 2537 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2538 { 2539 PVBOXHDD pDisk = pIoCtx->pDisk; 2540 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved; 2541 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2542 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead; 2543 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2544 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent; 2545 size_t cbFill = 0; 2546 size_t cbWriteCopy = 0; 2547 size_t cbReadImage = 0; 2548 2549 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2550 2551 AssertPtr(pIoCtx->pIoCtxParent); 2552 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent); 2553 2554 /* Calculate the amount of data to read that goes after the write to fill the block. */ 2555 if (cbPostRead) 2556 { 2557 /* If we have data to be written, use that instead of reading 2558 * data from the image. */ 2559 cbWriteCopy; 2560 if (cbWrite > cbThisWrite) 2561 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2562 2563 /* Figure out how much we cannot read from the image, because 2564 * the last block to write might exceed the nominal size of the 2565 * image for technical reasons. */ 2566 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2567 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2568 2569 /* The rest must be read from the image. */ 2570 cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2571 } 2572 2573 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill; 2574 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy; 2575 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage; 2576 2577 /* Next step */ 2578 if (cbPreRead) 2579 { 2580 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync; 2581 2582 /* Read the data that goes before the write to fill the block. */ 2583 pIoCtx->Req.Io.cbTransferLeft = cbPreRead; 2584 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft; 2585 pIoCtx->Req.Io.uOffset -= cbPreRead; 2586 } 2587 else 2588 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2589 2764 2590 return VINF_SUCCESS; 2765 2591 } … … 9637 9463 pfnComplete, pvUser1, pvUser2, 9638 9464 NULL, vdReadHelperAsync, 9639 VDIOCTX_FLAGS_ DEFAULT);9465 VDIOCTX_FLAGS_ZERO_FREE_BLOCKS); 9640 9466 if (!pIoCtx) 9641 9467 {
Note:
See TracChangeset
for help on using the changeset viewer.