Changeset 45155 in vbox
- Timestamp:
- Mar 24, 2013 8:08:10 PM (12 years ago)
- svn:sync-xref-src-repo-rev:
- 84477
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Storage/VD.cpp
r44799 r45155 355 355 /** S/G buffer */ 356 356 RTSGBUF SgBuf; 357 /** Number of bytes to clear in the buffer before the current read. */ 358 size_t cbBufClear; 359 /** Number of images to read. */ 360 unsigned cImagesRead; 361 /** Override for the parent image to start reading from. */ 362 PVDIMAGE pImageParentOverride; 357 363 } Io; 358 364 /** Discard requests. */ … … 424 430 425 431 /** Default flags for an I/O context, i.e. unblocked and async. */ 426 #define VDIOCTX_FLAGS_DEFAULT (0)432 #define VDIOCTX_FLAGS_DEFAULT (0) 427 433 /** Flag whether the context is blocked. */ 428 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)434 #define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) 429 435 /** Flag whether the I/O context is using synchronous I/O. */ 430 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 436 #define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) 437 /** Flag whether the read should update the cache. */ 438 #define VDIOCTX_FLAGS_READ_UDATE_CACHE RT_BIT_32(2) 439 /** Flag whether free blocks should be zeroed. 440 * If false and no image has data for sepcified 441 * range VERR_VD_BLOCK_FREE is returned for the I/O context. 442 * Note that unallocated blocks are still zeroed 443 * if at least one image has valid data for a part 444 * of the range. 445 */ 446 #define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) 447 /** Don't free the I/O context when complete because 448 * it was alloacted elsewhere (stack, ...). */ 449 #define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) 431 450 432 451 /** NIL I/O context pointer value. */ … … 577 596 /** Forward declaration of the async discard helper. */ 578 597 static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); 598 static int vdWriteHelperAsync(PVDIOCTX pIoCtx); 579 599 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); 580 600 static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); 601 static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq); 581 602 582 603 /** … … 809 830 pIoCtx->Req.Io.pImageStart = pImageStart; 810 831 pIoCtx->Req.Io.pImageCur = pImageStart; 832 pIoCtx->Req.Io.cbBufClear = 0; 833 pIoCtx->Req.Io.pImageParentOverride = NULL; 811 834 pIoCtx->cDataTransfersPending = 0; 812 835 pIoCtx->cMetaTransfersPending = 0; … … 817 840 pIoCtx->pfnIoCtxTransferNext = NULL; 818 841 pIoCtx->rcReq = VINF_SUCCESS; 842 pIoCtx->pIoCtxParent = NULL; 819 843 820 844 /* There is no S/G list for a flush request. */ … … 845 869 */ 846 870 static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, 847 PVDIOCTX pIoCtx, size_t cbRead, size_t *pcbRead)871 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead) 848 872 { 849 873 int rc = VINF_SUCCESS; … … 908 932 909 933 /** 934 * Creates a new empty discard state. 935 * 936 * @returns Pointer to the new discard state or NULL if out of memory. 937 */ 938 static PVDDISCARDSTATE vdDiscardStateCreate(void) 939 { 940 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE)); 941 942 if (pDiscard) 943 { 944 RTListInit(&pDiscard->ListLru); 945 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE)); 946 if (!pDiscard->pTreeBlocks) 947 { 948 RTMemFree(pDiscard); 949 pDiscard = NULL; 950 } 951 } 952 953 return pDiscard; 954 } 955 956 /** 957 * Removes the least recently used blocks from the waiting list until 958 * the new value is reached. 959 * 960 * @returns VBox status code. 961 * @param pDisk VD disk container. 962 * @param pDiscard The discard state. 963 * @param cbDiscardingNew How many bytes should be waiting on success. 964 * The number of bytes waiting can be less. 965 */ 966 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew) 967 { 968 int rc = VINF_SUCCESS; 969 970 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n", 971 pDisk, pDiscard, cbDiscardingNew)); 972 973 while (pDiscard->cbDiscarding > cbDiscardingNew) 974 { 975 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru); 976 977 Assert(!RTListIsEmpty(&pDiscard->ListLru)); 978 979 /* Go over the allocation bitmap and mark all discarded sectors as unused. */ 980 uint64_t offStart = pBlock->Core.Key; 981 uint32_t idxStart = 0; 982 size_t cbLeft = pBlock->cbDiscard; 983 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); 984 uint32_t cSectors = pBlock->cbDiscard / 512; 985 986 while (cbLeft > 0) 987 { 988 int32_t idxEnd; 989 size_t cbThis = cbLeft; 990 991 if (fAllocated) 992 { 993 /* Check for the first unallocated bit. */ 994 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart); 995 if (idxEnd != -1) 996 { 997 cbThis = (idxEnd - idxStart) * 512; 998 fAllocated = false; 999 } 1000 } 1001 else 1002 { 1003 /* Mark as unused and check for the first set bit. */ 1004 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart); 1005 if (idxEnd != -1) 1006 cbThis = (idxEnd - idxStart) * 512; 1007 1008 1009 VDIOCTX IoCtx; 1010 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, 1011 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1012 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, 1013 &IoCtx, offStart, cbThis, NULL, 1014 NULL, &cbThis, NULL, 1015 VD_DISCARD_MARK_UNUSED); 1016 if (RT_FAILURE(rc)) 1017 break; 1018 1019 fAllocated = true; 1020 } 1021 1022 idxStart = idxEnd; 1023 offStart += cbThis; 1024 cbLeft -= cbThis; 1025 } 1026 1027 if (RT_FAILURE(rc)) 1028 break; 1029 1030 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); 1031 Assert(pBlockRemove == pBlock); 1032 RTListNodeRemove(&pBlock->NodeLru); 1033 1034 pDiscard->cbDiscarding -= pBlock->cbDiscard; 1035 RTMemFree(pBlock->pbmAllocated); 1036 RTMemFree(pBlock); 1037 } 1038 1039 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew); 1040 1041 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1042 return rc; 1043 } 1044 1045 /** 1046 * Destroys the current discard state, writing any waiting blocks to the image. 1047 * 1048 * @returns VBox status code. 1049 * @param pDisk VD disk container. 1050 */ 1051 static int vdDiscardStateDestroy(PVBOXHDD pDisk) 1052 { 1053 int rc = VINF_SUCCESS; 1054 1055 if (pDisk->pDiscard) 1056 { 1057 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */); 1058 AssertRC(rc); 1059 RTMemFree(pDisk->pDiscard->pTreeBlocks); 1060 RTMemFree(pDisk->pDiscard); 1061 pDisk->pDiscard = NULL; 1062 } 1063 1064 return rc; 1065 } 1066 1067 /** 1068 * Marks the given range as allocated in the image. 1069 * Required if there are discards in progress and a write to a block which can get discarded 1070 * is written to. 1071 * 1072 * @returns VBox status code. 1073 * @param pDisk VD container data. 1074 * @param uOffset First byte to mark as allocated. 1075 * @param cbRange Number of bytes to mark as allocated. 1076 */ 1077 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange) 1078 { 1079 PVDDISCARDSTATE pDiscard = pDisk->pDiscard; 1080 int rc = VINF_SUCCESS; 1081 1082 if (pDiscard) 1083 { 1084 do 1085 { 1086 size_t cbThisRange = cbRange; 1087 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset); 1088 1089 if (pBlock) 1090 { 1091 int32_t idxStart, idxEnd; 1092 1093 Assert(!(cbThisRange % 512)); 1094 Assert(!((uOffset - pBlock->Core.Key) % 512)); 1095 1096 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); 1097 1098 idxStart = (uOffset - pBlock->Core.Key) / 512; 1099 idxEnd = idxStart + (cbThisRange / 512); 1100 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); 1101 } 1102 else 1103 { 1104 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true); 1105 if (pBlock) 1106 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset); 1107 } 1108 1109 Assert(cbRange >= cbThisRange); 1110 1111 uOffset += cbThisRange; 1112 cbRange -= cbThisRange; 1113 } while (cbRange != 0); 1114 } 1115 1116 return rc; 1117 } 1118 1119 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1120 uint64_t uOffset, size_t cbTransfer, 1121 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, 1122 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1123 uint32_t fFlags) 1124 { 1125 PVDIOCTX pIoCtx = NULL; 1126 1127 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1128 if (RT_LIKELY(pIoCtx)) 1129 { 1130 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1131 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1132 } 1133 1134 return pIoCtx; 1135 } 1136 1137 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1138 uint64_t uOffset, size_t cbTransfer, 1139 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1140 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1141 void *pvUser1, void *pvUser2, 1142 void *pvAllocation, 1143 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1144 uint32_t fFlags) 1145 { 1146 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1147 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); 1148 1149 if (RT_LIKELY(pIoCtx)) 1150 { 1151 pIoCtx->pIoCtxParent = NULL; 1152 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1153 pIoCtx->Type.Root.pvUser1 = pvUser1; 1154 pIoCtx->Type.Root.pvUser2 = pvUser2; 1155 } 1156 1157 LogFlow(("Allocated root I/O context %#p\n", pIoCtx)); 1158 return pIoCtx; 1159 } 1160 1161 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, 1162 unsigned cRanges, 1163 PFNVDASYNCTRANSFERCOMPLETE pfnComplete, 1164 void *pvUser1, void *pvUser2, 1165 void *pvAllocation, 1166 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, 1167 uint32_t fFlags) 1168 { 1169 PVDIOCTX pIoCtx = NULL; 1170 1171 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); 1172 if (RT_LIKELY(pIoCtx)) 1173 { 1174 pIoCtx->pIoCtxNext = NULL; 1175 pIoCtx->pDisk = pDisk; 1176 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD; 1177 pIoCtx->cDataTransfersPending = 0; 1178 pIoCtx->cMetaTransfersPending = 0; 1179 pIoCtx->fComplete = false; 1180 pIoCtx->fFlags = fFlags; 1181 pIoCtx->pvAllocation = pvAllocation; 1182 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; 1183 pIoCtx->pfnIoCtxTransferNext = NULL; 1184 pIoCtx->rcReq = VINF_SUCCESS; 1185 pIoCtx->Req.Discard.paRanges = paRanges; 1186 pIoCtx->Req.Discard.cRanges = cRanges; 1187 pIoCtx->Req.Discard.idxRange = 0; 1188 pIoCtx->Req.Discard.cbDiscardLeft = 0; 1189 pIoCtx->Req.Discard.offCur = 0; 1190 pIoCtx->Req.Discard.cbThisDiscard = 0; 1191 1192 pIoCtx->pIoCtxParent = NULL; 1193 pIoCtx->Type.Root.pfnComplete = pfnComplete; 1194 pIoCtx->Type.Root.pvUser1 = pvUser1; 1195 pIoCtx->Type.Root.pvUser2 = pvUser2; 1196 } 1197 1198 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx)); 1199 return pIoCtx; 1200 } 1201 1202 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, 1203 uint64_t uOffset, size_t cbTransfer, 1204 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf, 1205 PVDIOCTX pIoCtxParent, size_t cbTransferParent, 1206 size_t cbWriteParent, void *pvAllocation, 1207 PFNVDIOCTXTRANSFER pfnIoCtxTransfer) 1208 { 1209 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, 1210 pcSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE); 1211 1212 AssertPtr(pIoCtxParent); 1213 Assert(!pIoCtxParent->pIoCtxParent); 1214 1215 if (RT_LIKELY(pIoCtx)) 1216 { 1217 pIoCtx->pIoCtxParent = pIoCtxParent; 1218 pIoCtx->Type.Child.uOffsetSaved = uOffset; 1219 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer; 1220 pIoCtx->Type.Child.cbTransferParent = cbTransferParent; 1221 pIoCtx->Type.Child.cbWriteParent = cbWriteParent; 1222 } 1223 1224 LogFlow(("Allocated child I/O context %#p\n", pIoCtx)); 1225 return pIoCtx; 1226 } 1227 1228 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer) 1229 { 1230 PVDIOTASK pIoTask = NULL; 1231 1232 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1233 if (pIoTask) 1234 { 1235 pIoTask->pIoStorage = pIoStorage; 1236 pIoTask->pfnComplete = pfnComplete; 1237 pIoTask->pvUser = pvUser; 1238 pIoTask->fMeta = false; 1239 pIoTask->Type.User.cbTransfer = cbTransfer; 1240 pIoTask->Type.User.pIoCtx = pIoCtx; 1241 } 1242 1243 return pIoTask; 1244 } 1245 1246 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer) 1247 { 1248 PVDIOTASK pIoTask = NULL; 1249 1250 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask); 1251 if (pIoTask) 1252 { 1253 pIoTask->pIoStorage = pIoStorage; 1254 pIoTask->pfnComplete = pfnComplete; 1255 pIoTask->pvUser = pvUser; 1256 pIoTask->fMeta = true; 1257 pIoTask->Type.Meta.pMetaXfer = pMetaXfer; 1258 } 1259 1260 return pIoTask; 1261 } 1262 1263 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1264 { 1265 Log(("Freeing I/O context %#p\n", pIoCtx)); 1266 1267 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE)) 1268 { 1269 if (pIoCtx->pvAllocation) 1270 RTMemFree(pIoCtx->pvAllocation); 1271 #ifdef DEBUG 1272 memset(&pIoCtx->pDisk, 0xff, sizeof(void *)); 1273 #endif 1274 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); 1275 } 1276 } 1277 1278 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) 1279 { 1280 //#ifdef DEBUG 1281 memset(pIoTask, 0xff, sizeof(VDIOTASK)); 1282 //#endif 1283 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); 1284 } 1285 1286 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) 1287 { 1288 AssertPtr(pIoCtx->pIoCtxParent); 1289 1290 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 1291 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; 1292 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; 1293 } 1294 1295 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) 1296 { 1297 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb])); 1298 1299 if (RT_LIKELY(pMetaXfer)) 1300 { 1301 pMetaXfer->Core.Key = uOffset; 1302 pMetaXfer->Core.KeyLast = uOffset + cb - 1; 1303 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE; 1304 pMetaXfer->cbMeta = cb; 1305 pMetaXfer->pIoStorage = pIoStorage; 1306 pMetaXfer->cRefs = 0; 1307 RTListInit(&pMetaXfer->ListIoCtxWaiting); 1308 } 1309 return pMetaXfer; 1310 } 1311 1312 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) 1313 { 1314 /* Put it on the waiting list. */ 1315 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); 1316 PVDIOCTX pHeadOld; 1317 pIoCtx->pIoCtxNext = pNext; 1318 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) 1319 { 1320 pNext = pHeadOld; 1321 Assert(pNext != pIoCtx); 1322 pIoCtx->pIoCtxNext = pNext; 1323 ASMNopPause(); 1324 } 1325 } 1326 1327 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1328 { 1329 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); 1330 1331 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); 1332 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1333 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); 1334 } 1335 1336 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) 1337 { 1338 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData); 1339 } 1340 1341 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) 1342 { 1343 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); 1344 } 1345 1346 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) 1347 { 1348 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1349 } 1350 1351 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) 1352 { 1353 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); 1354 } 1355 1356 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) 1357 { 1358 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData); 1359 } 1360 1361 /** 1362 * Process the I/O context, core method which assumes that the I/O context 1363 * acquired the lock. 1364 * 1365 * @returns VBox status code. 1366 * @param pIoCtx I/O context to process. 1367 */ 1368 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) 1369 { 1370 int rc = VINF_SUCCESS; 1371 1372 VD_IS_LOCKED(pIoCtx->pDisk); 1373 1374 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 1375 1376 if ( !pIoCtx->cMetaTransfersPending 1377 && !pIoCtx->cDataTransfersPending 1378 && !pIoCtx->pfnIoCtxTransfer) 1379 { 1380 rc = VINF_VD_ASYNC_IO_FINISHED; 1381 goto out; 1382 } 1383 1384 /* 1385 * We complete the I/O context in case of an error 1386 * if there is no I/O task pending. 1387 */ 1388 if ( RT_FAILURE(pIoCtx->rcReq) 1389 && !pIoCtx->cMetaTransfersPending 1390 && !pIoCtx->cDataTransfersPending) 1391 { 1392 rc = VINF_VD_ASYNC_IO_FINISHED; 1393 goto out; 1394 } 1395 1396 /* Don't change anything if there is a metadata transfer pending or we are blocked. */ 1397 if ( pIoCtx->cMetaTransfersPending 1398 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) 1399 { 1400 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1401 goto out; 1402 } 1403 1404 if (pIoCtx->pfnIoCtxTransfer) 1405 { 1406 /* Call the transfer function advancing to the next while there is no error. */ 1407 while ( pIoCtx->pfnIoCtxTransfer 1408 && !pIoCtx->cMetaTransfersPending 1409 && RT_SUCCESS(rc)) 1410 { 1411 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer)); 1412 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx); 1413 1414 /* Advance to the next part of the transfer if the current one succeeded. */ 1415 if (RT_SUCCESS(rc)) 1416 { 1417 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; 1418 pIoCtx->pfnIoCtxTransferNext = NULL; 1419 } 1420 } 1421 } 1422 1423 if ( RT_SUCCESS(rc) 1424 && !pIoCtx->cMetaTransfersPending 1425 && !pIoCtx->cDataTransfersPending) 1426 rc = VINF_VD_ASYNC_IO_FINISHED; 1427 else if ( RT_SUCCESS(rc) 1428 || rc == VERR_VD_NOT_ENOUGH_METADATA 1429 || rc == VERR_VD_IOCTX_HALT) 1430 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1431 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) 1432 { 1433 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); 1434 /* 1435 * The I/O context completed if we have an error and there is no data 1436 * or meta data transfer pending. 1437 */ 1438 if ( !pIoCtx->cMetaTransfersPending 1439 && !pIoCtx->cDataTransfersPending) 1440 rc = VINF_VD_ASYNC_IO_FINISHED; 1441 else 1442 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1443 } 1444 1445 out: 1446 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 1447 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending, 1448 pIoCtx->fComplete)); 1449 1450 return rc; 1451 } 1452 1453 /** 1454 * Processes the list of waiting I/O contexts. 1455 * 1456 * @returns VBox status code. 1457 * @param pDisk The disk structure. 1458 * @param pIoCtxRc An I/O context handle which waits on the list. When processed 1459 * The status code is returned. NULL if there is no I/O context 1460 * to return the status code for. 1461 */ 1462 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) 1463 { 1464 int rc = VINF_SUCCESS; 1465 1466 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); 1467 1468 VD_IS_LOCKED(pDisk); 1469 1470 /* Get the waiting list and process it in FIFO order. */ 1471 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); 1472 1473 /* Reverse it. */ 1474 PVDIOCTX pCur = pIoCtxHead; 1475 pIoCtxHead = NULL; 1476 while (pCur) 1477 { 1478 PVDIOCTX pInsert = pCur; 1479 pCur = pCur->pIoCtxNext; 1480 pInsert->pIoCtxNext = pIoCtxHead; 1481 pIoCtxHead = pInsert; 1482 } 1483 1484 /* Process now. */ 1485 pCur = pIoCtxHead; 1486 while (pCur) 1487 { 1488 int rcTmp; 1489 PVDIOCTX pTmp = pCur; 1490 1491 pCur = pCur->pIoCtxNext; 1492 pTmp->pIoCtxNext = NULL; 1493 1494 /* 1495 * Need to clear the sync flag here if there is a new I/O context 1496 * with it set and the context is not given in pIoCtxRc. 1497 * This happens most likely on a different thread and that one shouldn't 1498 * process the context synchronously. 1499 * 1500 * The thread who issued the context will wait on the event semaphore 1501 * anyway which is signalled when the completion handler is called. 1502 */ 1503 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC 1504 && pTmp != pIoCtxRc) 1505 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC; 1506 1507 rcTmp = vdIoCtxProcessLocked(pTmp); 1508 if (pTmp == pIoCtxRc) 1509 { 1510 /* The given I/O context was processed, pass the return code to the caller. */ 1511 rc = rcTmp; 1512 } 1513 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED 1514 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1515 { 1516 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1517 vdThreadFinishWrite(pDisk); 1518 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1519 pTmp->Type.Root.pvUser2, 1520 pTmp->rcReq); 1521 vdIoCtxFree(pDisk, pTmp); 1522 } 1523 } 1524 1525 LogFlowFunc(("returns rc=%Rrc\n", rc)); 1526 return rc; 1527 } 1528 1529 /** 1530 * Processes the list of blocked I/O contexts. 1531 * 1532 * @returns nothing. 1533 * @param pDisk The disk structure. 1534 */ 1535 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) 1536 { 1537 LogFlowFunc(("pDisk=%#p\n", pDisk)); 1538 1539 VD_IS_LOCKED(pDisk); 1540 1541 /* Get the waiting list and process it in FIFO order. */ 1542 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); 1543 1544 /* Reverse it. */ 1545 PVDIOCTX pCur = pIoCtxHead; 1546 pIoCtxHead = NULL; 1547 while (pCur) 1548 { 1549 PVDIOCTX pInsert = pCur; 1550 pCur = pCur->pIoCtxNext; 1551 pInsert->pIoCtxNext = pIoCtxHead; 1552 pIoCtxHead = pInsert; 1553 } 1554 1555 /* Process now. */ 1556 pCur = pIoCtxHead; 1557 while (pCur) 1558 { 1559 int rc; 1560 PVDIOCTX pTmp = pCur; 1561 1562 pCur = pCur->pIoCtxNext; 1563 pTmp->pIoCtxNext = NULL; 1564 1565 Assert(!pTmp->pIoCtxParent); 1566 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); 1567 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; 1568 1569 rc = vdIoCtxProcessLocked(pTmp); 1570 if ( rc == VINF_VD_ASYNC_IO_FINISHED 1571 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) 1572 { 1573 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); 1574 vdThreadFinishWrite(pDisk); 1575 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, 1576 pTmp->Type.Root.pvUser2, 1577 pTmp->rcReq); 1578 vdIoCtxFree(pDisk, pTmp); 1579 } 1580 } 1581 1582 LogFlowFunc(("returns\n")); 1583 } 1584 1585 /** 1586 * Processes the I/O context trying to lock the criticial section. 1587 * The context is deferred if the critical section is busy. 1588 * 1589 * @returns VBox status code. 1590 * @param pIoCtx The I/O context to process. 1591 */ 1592 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) 1593 { 1594 int rc = VINF_SUCCESS; 1595 PVBOXHDD pDisk = pIoCtx->pDisk; 1596 1597 Log(("Defer pIoCtx=%#p\n", pIoCtx)); 1598 1599 /* Put it on the waiting list first. */ 1600 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); 1601 1602 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) 1603 { 1604 /* Leave it again, the context will be processed just before leaving the lock. */ 1605 LogFlowFunc(("Successfully acquired the lock\n")); 1606 rc = vdDiskUnlock(pDisk, pIoCtx); 1607 } 1608 else 1609 { 1610 LogFlowFunc(("Lock is held\n")); 1611 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1612 } 1613 1614 return rc; 1615 } 1616 1617 /** 1618 * Process the I/O context in a synchronous manner, waiting 1619 * for it to complete. 1620 * 1621 * @returns VBox status code of the completed request. 1622 * @param pIoCtx The sync I/O context. 1623 */ 1624 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) 1625 { 1626 int rc = VINF_SUCCESS; 1627 PVBOXHDD pDisk = pIoCtx->pDisk; 1628 1629 LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); 1630 1631 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, 1632 ("I/O context is not marked as synchronous\n")); 1633 1634 rc = vdIoCtxProcessTryLockDefer(pIoCtx); 1635 if (rc == VINF_VD_ASYNC_IO_FINISHED) 1636 rc = VINF_SUCCESS; 1637 1638 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1639 { 1640 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); 1641 AssertRC(rc); 1642 1643 rc = pDisk->rcSync; 1644 } 1645 else /* Success or error. */ 1646 vdIoCtxFree(pDisk, pIoCtx); 1647 1648 return rc; 1649 } 1650 1651 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1652 { 1653 return pDisk->pIoCtxLockOwner == pIoCtx; 1654 } 1655 1656 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) 1657 { 1658 int rc = VINF_SUCCESS; 1659 1660 VD_IS_LOCKED(pDisk); 1661 1662 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); 1663 1664 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) 1665 { 1666 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ 1667 vdIoCtxDefer(pDisk, pIoCtx); 1668 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 1669 } 1670 1671 LogFlowFunc(("returns -> %Rrc\n", rc)); 1672 return rc; 1673 } 1674 1675 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) 1676 { 1677 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", 1678 pDisk, pIoCtx, fProcessBlockedReqs)); 1679 1680 VD_IS_LOCKED(pDisk); 1681 1682 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); 1683 Assert(pDisk->pIoCtxLockOwner == pIoCtx); 1684 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); 1685 1686 if (fProcessBlockedReqs) 1687 { 1688 /* Process any blocked writes if the current request didn't caused another growing. */ 1689 vdDiskProcessBlockedIoCtx(pDisk); 1690 } 1691 1692 LogFlowFunc(("returns\n")); 1693 } 1694 1695 /** 910 1696 * Internal: Reads a given amount of data from the image chain of the disk. 911 1697 **/ 912 1698 static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, 913 uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead)1699 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead) 914 1700 { 915 1701 int rc = VINF_SUCCESS; 916 1702 size_t cbThisRead = cbRead; 917 RTSGSEG SegmentBuf;918 RTSGBUF SgBuf;919 VDIOCTX IoCtx;920 1703 921 1704 AssertPtr(pcbThisRead); 922 1705 923 1706 *pcbThisRead = 0; 924 925 SegmentBuf.pvSeg = pvBuf;926 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;927 RTSgBufInit(&SgBuf, &SegmentBuf, 1);928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL,929 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);930 1707 931 1708 /* … … 934 1711 */ 935 1712 rc = pImage->Backend->pfnRead(pImage->pBackendData, 936 uOffset, cbThisRead, &IoCtx,937 1713 uOffset, cbThisRead, pIoCtx, 1714 &cbThisRead); 938 1715 939 1716 if (rc == VERR_VD_BLOCK_FREE) … … 944 1721 { 945 1722 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 946 uOffset, cbThisRead, &IoCtx,947 1723 uOffset, cbThisRead, pIoCtx, 1724 &cbThisRead); 948 1725 } 949 1726 } … … 952 1729 *pcbThisRead = cbThisRead; 953 1730 1731 return rc; 1732 } 1733 1734 /** 1735 * internal: read the specified amount of data in whatever blocks the backend 1736 * will give us - async version. 1737 */ 1738 static int vdReadHelperAsync(PVDIOCTX pIoCtx) 1739 { 1740 int rc; 1741 PVBOXHDD pDisk = pIoCtx->pDisk; 1742 size_t cbToRead = pIoCtx->Req.Io.cbTransfer; 1743 uint64_t uOffset = pIoCtx->Req.Io.uOffset; 1744 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur; 1745 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride; 1746 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead; 1747 size_t cbThisRead; 1748 1749 /* Loop until all reads started or we have a backend which needs to read metadata. */ 1750 do 1751 { 1752 /* Search for image with allocated block. Do not attempt to read more 1753 * than the previous reads marked as valid. Otherwise this would return 1754 * stale data when different block sizes are used for the images. */ 1755 cbThisRead = cbToRead; 1756 1757 if ( pDisk->pCache 1758 && !pImageParentOverride) 1759 { 1760 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead, 1761 pIoCtx, &cbThisRead); 1762 if (rc == VERR_VD_BLOCK_FREE) 1763 { 1764 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead, 1765 pIoCtx, &cbThisRead); 1766 1767 /* If the read was successful, write the data back into the cache. */ 1768 if ( RT_SUCCESS(rc) 1769 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UDATE_CACHE) 1770 { 1771 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead, 1772 pIoCtx, NULL); 1773 } 1774 } 1775 } 1776 else 1777 { 1778 1779 /* 1780 * Try to read from the given image. 1781 * If the block is not allocated read from override chain if present. 1782 */ 1783 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1784 uOffset, cbThisRead, pIoCtx, 1785 &cbThisRead); 1786 1787 if ( rc == VERR_VD_BLOCK_FREE 1788 && cImagesRead != 1) 1789 { 1790 unsigned cImagesToProcess = cImagesRead; 1791 1792 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev; 1793 pIoCtx->Req.Io.pImageParentOverride = NULL; 1794 1795 while (pCurrImage && rc == VERR_VD_BLOCK_FREE) 1796 { 1797 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1798 uOffset, cbThisRead, 1799 pIoCtx, &cbThisRead); 1800 if (cImagesToProcess == 1) 1801 break; 1802 else if (cImagesToProcess > 0) 1803 cImagesToProcess--; 1804 1805 if (rc == VERR_VD_BLOCK_FREE) 1806 pCurrImage = pCurrImage->pPrev; 1807 } 1808 } 1809 } 1810 1811 /* The task state will be updated on success already, don't do it here!. */ 1812 if (rc == VERR_VD_BLOCK_FREE) 1813 { 1814 /* No image in the chain contains the data for the block. */ 1815 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); 1816 1817 /* Fill the free space with 0 if we are told to do so 1818 * or a previous read returned valid data. */ 1819 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) 1820 vdIoCtxSet(pIoCtx, '\0', cbThisRead); 1821 else 1822 pIoCtx->Req.Io.cbBufClear += cbThisRead; 1823 1824 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1825 rc = VINF_VD_NEW_ZEROED_BLOCK; 1826 else 1827 rc = VINF_SUCCESS; 1828 } 1829 else if (rc == VERR_VD_IOCTX_HALT) 1830 { 1831 uOffset += cbThisRead; 1832 cbToRead -= cbThisRead; 1833 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; 1834 } 1835 else if ( RT_SUCCESS(rc) 1836 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) 1837 { 1838 /* First not free block, fill the space before with 0. */ 1839 if ( pIoCtx->Req.Io.cbBufClear 1840 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) 1841 { 1842 RTSGBUF SgBuf; 1843 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf); 1844 RTSgBufReset(&SgBuf); 1845 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear); 1846 pIoCtx->Req.Io.cbBufClear = 0; 1847 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1848 } 1849 rc = VINF_SUCCESS; 1850 } 1851 1852 if (RT_FAILURE(rc)) 1853 break; 1854 1855 cbToRead -= cbThisRead; 1856 uOffset += cbThisRead; 1857 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */ 1858 } while (cbToRead != 0 && RT_SUCCESS(rc)); 1859 1860 if ( rc == VERR_VD_NOT_ENOUGH_METADATA 1861 || rc == VERR_VD_IOCTX_HALT) 1862 { 1863 /* Save the current state. */ 1864 pIoCtx->Req.Io.uOffset = uOffset; 1865 pIoCtx->Req.Io.cbTransfer = cbToRead; 1866 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; 1867 } 1868 1869 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) 1870 ? VERR_VD_BLOCK_FREE 1871 : rc; 1872 } 1873 1874 /** 1875 * internal: parent image read wrapper for compacting. 1876 */ 1877 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, 1878 size_t cbRead) 1879 { 1880 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; 1881 1882 /** @todo 1883 * Only used for compaction so far which is not possible to mix with async I/O. 1884 * Needs to be changed if we want to support online compaction of images. 1885 */ 1886 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true); 1887 AssertMsgReturn(!fLocked, 1888 ("Calling synchronous parent read while another thread holds the disk lock\n"), 1889 VERR_VD_INVALID_STATE); 1890 1891 /* Fake an I/O context. */ 1892 RTSGSEG Segment; 1893 RTSGBUF SgBuf; 1894 VDIOCTX IoCtx; 1895 1896 Segment.pvSeg = pvBuf; 1897 Segment.cbSeg = cbRead; 1898 RTSgBufInit(&SgBuf, &Segment, 1); 1899 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage, 1900 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1901 int rc = vdReadHelperAsync(&IoCtx); 1902 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false); 954 1903 return rc; 955 1904 } … … 984 1933 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) 985 1934 { 986 int rc = VINF_SUCCESS; 987 size_t cbThisRead; 988 bool fAllFree = true; 989 size_t cbBufClear = 0; 990 991 /* Loop until all read. */ 992 do 993 { 994 /* Search for image with allocated block. Do not attempt to read more 995 * than the previous reads marked as valid. Otherwise this would return 996 * stale data when different block sizes are used for the images. */ 997 cbThisRead = cbRead; 998 999 if ( pDisk->pCache 1000 && !pImageParentOverride) 1001 { 1002 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1003 rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, 1004 cbThisRead, &cbThisRead); 1005 #endif 1006 if (rc == VERR_VD_BLOCK_FREE) 1007 { 1008 rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, 1009 &cbThisRead); 1010 1011 /* If the read was successful, write the data back into the cache. */ 1012 if ( RT_SUCCESS(rc) 1013 && fUpdateCache) 1014 { 1015 #if 0 /** @todo: Will go soon when the sync and async read helper versions are merged. */ 1016 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, 1017 cbThisRead, NULL); 1018 #endif 1019 } 1020 } 1021 } 1022 else 1023 { 1024 RTSGSEG SegmentBuf; 1025 RTSGBUF SgBuf; 1026 VDIOCTX IoCtx; 1027 1028 SegmentBuf.pvSeg = pvBuf; 1029 SegmentBuf.cbSeg = cbThisRead; 1030 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 1031 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, 1032 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 1033 1034 /* 1035 * Try to read from the given image. 1036 * If the block is not allocated read from override chain if present. 1037 */ 1038 rc = pImage->Backend->pfnRead(pImage->pBackendData, 1039 uOffset, cbThisRead, &IoCtx, 1040 &cbThisRead); 1041 1042 if ( rc == VERR_VD_BLOCK_FREE 1043 && cImagesRead != 1) 1044 { 1045 unsigned cImagesToProcess = cImagesRead; 1046 1047 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; 1048 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; 1049 pCurrImage = pCurrImage->pPrev) 1050 { 1051 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, 1052 uOffset, cbThisRead, 1053 &IoCtx, &cbThisRead); 1054 if (cImagesToProcess == 1) 1055 break; 1056 else if (cImagesToProcess > 0) 1057 cImagesToProcess--; 1058 } 1059 } 1060 } 1061 1062 /* No image in the chain contains the data for the block. */ 1063 if (rc == VERR_VD_BLOCK_FREE) 1064 { 1065 /* Fill the free space with 0 if we are told to do so 1066 * or a previous read returned valid data. */ 1067 if (fZeroFreeBlocks || !fAllFree) 1068 memset(pvBuf, '\0', cbThisRead); 1069 else 1070 cbBufClear += cbThisRead; 1071 1072 if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) 1073 rc = VINF_VD_NEW_ZEROED_BLOCK; 1074 else 1075 rc = VINF_SUCCESS; 1076 } 1077 else if (RT_SUCCESS(rc)) 1078 { 1079 /* First not free block, fill the space before with 0. */ 1080 if (!fZeroFreeBlocks) 1081 { 1082 memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); 1083 cbBufClear = 0; 1084 fAllFree = false; 1085 } 1086 } 1087 1088 cbRead -= cbThisRead; 1089 uOffset += cbThisRead; 1090 pvBuf = (char *)pvBuf + cbThisRead; 1091 } while (cbRead != 0 && RT_SUCCESS(rc)); 1092 1093 return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; 1935 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 1936 RTSGSEG Segment; 1937 RTSGBUF SgBuf; 1938 VDIOCTX IoCtx; 1939 1940 if (fZeroFreeBlocks) 1941 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 1942 if (fUpdateCache) 1943 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 1944 1945 Segment.pvSeg = pvBuf; 1946 Segment.cbSeg = cbRead; 1947 RTSgBufInit(&SgBuf, &Segment, 1); 1948 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, 1949 NULL, vdReadHelperAsync, fFlags); 1950 1951 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 1952 IoCtx.Req.Io.cImagesRead = cImagesRead; 1953 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 1954 IoCtx.Type.Root.pvUser1 = pDisk; 1955 IoCtx.Type.Root.pvUser2 = NULL; 1956 return vdIoCtxProcessSync(&IoCtx); 1094 1957 } 1095 1958 … … 1103 1966 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, 1104 1967 true /* fZeroFreeBlocks */, fUpdateCache, 0); 1105 }1106 1107 /**1108 * Creates a new empty discard state.1109 *1110 * @returns Pointer to the new discard state or NULL if out of memory.1111 */1112 static PVDDISCARDSTATE vdDiscardStateCreate(void)1113 {1114 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));1115 1116 if (pDiscard)1117 {1118 RTListInit(&pDiscard->ListLru);1119 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));1120 if (!pDiscard->pTreeBlocks)1121 {1122 RTMemFree(pDiscard);1123 pDiscard = NULL;1124 }1125 }1126 1127 return pDiscard;1128 }1129 1130 /**1131 * Removes the least recently used blocks from the waiting list until1132 * the new value is reached.1133 *1134 * @returns VBox status code.1135 * @param pDisk VD disk container.1136 * @param pDiscard The discard state.1137 * @param cbDiscardingNew How many bytes should be waiting on success.1138 * The number of bytes waiting can be less.1139 */1140 static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)1141 {1142 int rc = VINF_SUCCESS;1143 1144 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",1145 pDisk, pDiscard, cbDiscardingNew));1146 1147 while (pDiscard->cbDiscarding > cbDiscardingNew)1148 {1149 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);1150 1151 Assert(!RTListIsEmpty(&pDiscard->ListLru));1152 1153 /* Go over the allocation bitmap and mark all discarded sectors as unused. */1154 uint64_t offStart = pBlock->Core.Key;1155 uint32_t idxStart = 0;1156 size_t cbLeft = pBlock->cbDiscard;1157 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);1158 uint32_t cSectors = pBlock->cbDiscard / 512;1159 1160 while (cbLeft > 0)1161 {1162 int32_t idxEnd;1163 size_t cbThis = cbLeft;1164 1165 if (fAllocated)1166 {1167 /* Check for the first unallocated bit. */1168 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);1169 if (idxEnd != -1)1170 {1171 cbThis = (idxEnd - idxStart) * 512;1172 fAllocated = false;1173 }1174 }1175 else1176 {1177 /* Mark as unused and check for the first set bit. */1178 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);1179 if (idxEnd != -1)1180 cbThis = (idxEnd - idxStart) * 512;1181 1182 1183 VDIOCTX IoCtx;1184 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,1185 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);1186 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,1187 &IoCtx, offStart, cbThis, NULL,1188 NULL, &cbThis, NULL,1189 VD_DISCARD_MARK_UNUSED);1190 if (RT_FAILURE(rc))1191 break;1192 1193 fAllocated = true;1194 }1195 1196 idxStart = idxEnd;1197 offStart += cbThis;1198 cbLeft -= cbThis;1199 }1200 1201 if (RT_FAILURE(rc))1202 break;1203 1204 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);1205 Assert(pBlockRemove == pBlock);1206 RTListNodeRemove(&pBlock->NodeLru);1207 1208 pDiscard->cbDiscarding -= pBlock->cbDiscard;1209 RTMemFree(pBlock->pbmAllocated);1210 RTMemFree(pBlock);1211 }1212 1213 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);1214 1215 LogFlowFunc(("returns rc=%Rrc\n", rc));1216 return rc;1217 }1218 1219 /**1220 * Destroys the current discard state, writing any waiting blocks to the image.1221 *1222 * @returns VBox status code.1223 * @param pDisk VD disk container.1224 */1225 static int vdDiscardStateDestroy(PVBOXHDD pDisk)1226 {1227 int rc = VINF_SUCCESS;1228 1229 if (pDisk->pDiscard)1230 {1231 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);1232 AssertRC(rc);1233 RTMemFree(pDisk->pDiscard->pTreeBlocks);1234 RTMemFree(pDisk->pDiscard);1235 pDisk->pDiscard = NULL;1236 }1237 1238 return rc;1239 }1240 1241 /**1242 * Marks the given range as allocated in the image.1243 * Required if there are discards in progress and a write to a block which can get discarded1244 * is written to.1245 *1246 * @returns VBox status code.1247 * @param pDisk VD container data.1248 * @param uOffset First byte to mark as allocated.1249 * @param cbRange Number of bytes to mark as allocated.1250 */1251 static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRange)1252 {1253 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;1254 int rc = VINF_SUCCESS;1255 1256 if (pDiscard)1257 {1258 do1259 {1260 size_t cbThisRange = cbRange;1261 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);1262 1263 if (pBlock)1264 {1265 int32_t idxStart, idxEnd;1266 1267 Assert(!(cbThisRange % 512));1268 Assert(!((uOffset - pBlock->Core.Key) % 512));1269 1270 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);1271 1272 idxStart = (uOffset - pBlock->Core.Key) / 512;1273 idxEnd = idxStart + (cbThisRange / 512);1274 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);1275 }1276 else1277 {1278 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);1279 if (pBlock)1280 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);1281 }1282 1283 Assert(cbRange >= cbThisRange);1284 1285 uOffset += cbThisRange;1286 cbRange -= cbThisRange;1287 } while (cbRange != 0);1288 }1289 1290 return rc;1291 }1292 1293 DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1294 uint64_t uOffset, size_t cbTransfer,1295 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,1296 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1297 uint32_t fFlags)1298 {1299 PVDIOCTX pIoCtx = NULL;1300 1301 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1302 if (RT_LIKELY(pIoCtx))1303 {1304 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1305 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1306 }1307 1308 return pIoCtx;1309 }1310 1311 DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1312 uint64_t uOffset, size_t cbTransfer,1313 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1314 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1315 void *pvUser1, void *pvUser2,1316 void *pvAllocation,1317 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1318 uint32_t fFlags)1319 {1320 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1321 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);1322 1323 if (RT_LIKELY(pIoCtx))1324 {1325 pIoCtx->pIoCtxParent = NULL;1326 pIoCtx->Type.Root.pfnComplete = pfnComplete;1327 pIoCtx->Type.Root.pvUser1 = pvUser1;1328 pIoCtx->Type.Root.pvUser2 = pvUser2;1329 }1330 1331 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));1332 return pIoCtx;1333 }1334 1335 DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges,1336 unsigned cRanges,1337 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,1338 void *pvUser1, void *pvUser2,1339 void *pvAllocation,1340 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,1341 uint32_t fFlags)1342 {1343 PVDIOCTX pIoCtx = NULL;1344 1345 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);1346 if (RT_LIKELY(pIoCtx))1347 {1348 pIoCtx->pIoCtxNext = NULL;1349 pIoCtx->pDisk = pDisk;1350 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;1351 pIoCtx->cDataTransfersPending = 0;1352 pIoCtx->cMetaTransfersPending = 0;1353 pIoCtx->fComplete = false;1354 pIoCtx->fFlags = fFlags;1355 pIoCtx->pvAllocation = pvAllocation;1356 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;1357 pIoCtx->pfnIoCtxTransferNext = NULL;1358 pIoCtx->rcReq = VINF_SUCCESS;1359 pIoCtx->Req.Discard.paRanges = paRanges;1360 pIoCtx->Req.Discard.cRanges = cRanges;1361 pIoCtx->Req.Discard.idxRange = 0;1362 pIoCtx->Req.Discard.cbDiscardLeft = 0;1363 pIoCtx->Req.Discard.offCur = 0;1364 pIoCtx->Req.Discard.cbThisDiscard = 0;1365 1366 pIoCtx->pIoCtxParent = NULL;1367 pIoCtx->Type.Root.pfnComplete = pfnComplete;1368 pIoCtx->Type.Root.pvUser1 = pvUser1;1369 pIoCtx->Type.Root.pvUser2 = pvUser2;1370 }1371 1372 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));1373 return pIoCtx;1374 }1375 1376 DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir,1377 uint64_t uOffset, size_t cbTransfer,1378 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,1379 PVDIOCTX pIoCtxParent, size_t cbTransferParent,1380 size_t cbWriteParent, void *pvAllocation,1381 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)1382 {1383 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,1384 pcSgBuf, pvAllocation, pfnIoCtxTransfer, 0);1385 1386 AssertPtr(pIoCtxParent);1387 Assert(!pIoCtxParent->pIoCtxParent);1388 1389 if (RT_LIKELY(pIoCtx))1390 {1391 pIoCtx->pIoCtxParent = pIoCtxParent;1392 pIoCtx->Type.Child.uOffsetSaved = uOffset;1393 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;1394 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;1395 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;1396 }1397 1398 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));1399 return pIoCtx;1400 }1401 1402 DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)1403 {1404 PVDIOTASK pIoTask = NULL;1405 1406 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1407 if (pIoTask)1408 {1409 pIoTask->pIoStorage = pIoStorage;1410 pIoTask->pfnComplete = pfnComplete;1411 pIoTask->pvUser = pvUser;1412 pIoTask->fMeta = false;1413 pIoTask->Type.User.cbTransfer = cbTransfer;1414 pIoTask->Type.User.pIoCtx = pIoCtx;1415 }1416 1417 return pIoTask;1418 }1419 1420 DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)1421 {1422 PVDIOTASK pIoTask = NULL;1423 1424 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);1425 if (pIoTask)1426 {1427 pIoTask->pIoStorage = pIoStorage;1428 pIoTask->pfnComplete = pfnComplete;1429 pIoTask->pvUser = pvUser;1430 pIoTask->fMeta = true;1431 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;1432 }1433 1434 return pIoTask;1435 }1436 1437 DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1438 {1439 LogFlow(("Freeing I/O context %#p\n", pIoCtx));1440 if (pIoCtx->pvAllocation)1441 RTMemFree(pIoCtx->pvAllocation);1442 #ifdef DEBUG1443 memset(pIoCtx, 0xff, sizeof(VDIOCTX));1444 #endif1445 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);1446 }1447 1448 DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask)1449 {1450 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);1451 }1452 1453 DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)1454 {1455 AssertPtr(pIoCtx->pIoCtxParent);1456 1457 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);1458 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;1459 pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved;1460 }1461 1462 DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)1463 {1464 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));1465 1466 if (RT_LIKELY(pMetaXfer))1467 {1468 pMetaXfer->Core.Key = uOffset;1469 pMetaXfer->Core.KeyLast = uOffset + cb - 1;1470 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;1471 pMetaXfer->cbMeta = cb;1472 pMetaXfer->pIoStorage = pIoStorage;1473 pMetaXfer->cRefs = 0;1474 RTListInit(&pMetaXfer->ListIoCtxWaiting);1475 }1476 return pMetaXfer;1477 }1478 1479 DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)1480 {1481 /* Put it on the waiting list. */1482 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);1483 PVDIOCTX pHeadOld;1484 pIoCtx->pIoCtxNext = pNext;1485 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))1486 {1487 pNext = pHeadOld;1488 Assert(pNext != pIoCtx);1489 pIoCtx->pIoCtxNext = pNext;1490 ASMNopPause();1491 }1492 }1493 1494 DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1495 {1496 LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx));1497 1498 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));1499 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1500 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);1501 }1502 1503 static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)1504 {1505 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);1506 }1507 1508 static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)1509 {1510 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);1511 }1512 1513 static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)1514 {1515 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1516 }1517 1518 static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)1519 {1520 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);1521 }1522 1523 static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)1524 {1525 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);1526 }1527 1528 /**1529 * Process the I/O context, core method which assumes that the I/O context1530 * acquired the lock.1531 *1532 * @returns VBox status code.1533 * @param pIoCtx I/O context to process.1534 */1535 static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)1536 {1537 int rc = VINF_SUCCESS;1538 1539 VD_IS_LOCKED(pIoCtx->pDisk);1540 1541 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1542 1543 if ( !pIoCtx->cMetaTransfersPending1544 && !pIoCtx->cDataTransfersPending1545 && !pIoCtx->pfnIoCtxTransfer)1546 {1547 rc = VINF_VD_ASYNC_IO_FINISHED;1548 goto out;1549 }1550 1551 /*1552 * We complete the I/O context in case of an error1553 * if there is no I/O task pending.1554 */1555 if ( RT_FAILURE(pIoCtx->rcReq)1556 && !pIoCtx->cMetaTransfersPending1557 && !pIoCtx->cDataTransfersPending)1558 {1559 rc = VINF_VD_ASYNC_IO_FINISHED;1560 goto out;1561 }1562 1563 /* Don't change anything if there is a metadata transfer pending or we are blocked. */1564 if ( pIoCtx->cMetaTransfersPending1565 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))1566 {1567 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1568 goto out;1569 }1570 1571 if (pIoCtx->pfnIoCtxTransfer)1572 {1573 /* Call the transfer function advancing to the next while there is no error. */1574 while ( pIoCtx->pfnIoCtxTransfer1575 && !pIoCtx->cMetaTransfersPending1576 && RT_SUCCESS(rc))1577 {1578 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));1579 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);1580 1581 /* Advance to the next part of the transfer if the current one succeeded. */1582 if (RT_SUCCESS(rc))1583 {1584 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;1585 pIoCtx->pfnIoCtxTransferNext = NULL;1586 }1587 }1588 }1589 1590 if ( RT_SUCCESS(rc)1591 && !pIoCtx->cMetaTransfersPending1592 && !pIoCtx->cDataTransfersPending)1593 rc = VINF_VD_ASYNC_IO_FINISHED;1594 else if ( RT_SUCCESS(rc)1595 || rc == VERR_VD_NOT_ENOUGH_METADATA1596 || rc == VERR_VD_IOCTX_HALT)1597 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1598 else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))1599 {1600 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);1601 /*1602 * The I/O context completed if we have an error and there is no data1603 * or meta data transfer pending.1604 */1605 if ( !pIoCtx->cMetaTransfersPending1606 && !pIoCtx->cDataTransfersPending)1607 rc = VINF_VD_ASYNC_IO_FINISHED;1608 else1609 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1610 }1611 1612 out:1613 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",1614 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,1615 pIoCtx->fComplete));1616 1617 return rc;1618 }1619 1620 /**1621 * Processes the list of waiting I/O contexts.1622 *1623 * @returns VBox status code.1624 * @param pDisk The disk structure.1625 * @param pIoCtxRc An I/O context handle which waits on the list. When processed1626 * The status code is returned. NULL if there is no I/O context1627 * to return the status code for.1628 */1629 static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc)1630 {1631 int rc = VINF_SUCCESS;1632 1633 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));1634 1635 VD_IS_LOCKED(pDisk);1636 1637 /* Get the waiting list and process it in FIFO order. */1638 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);1639 1640 /* Reverse it. */1641 PVDIOCTX pCur = pIoCtxHead;1642 pIoCtxHead = NULL;1643 while (pCur)1644 {1645 PVDIOCTX pInsert = pCur;1646 pCur = pCur->pIoCtxNext;1647 pInsert->pIoCtxNext = pIoCtxHead;1648 pIoCtxHead = pInsert;1649 }1650 1651 /* Process now. */1652 pCur = pIoCtxHead;1653 while (pCur)1654 {1655 int rcTmp;1656 PVDIOCTX pTmp = pCur;1657 1658 pCur = pCur->pIoCtxNext;1659 pTmp->pIoCtxNext = NULL;1660 1661 rcTmp = vdIoCtxProcessLocked(pTmp);1662 if (pTmp == pIoCtxRc)1663 {1664 /* The given I/O context was processed, pass the return code to the caller. */1665 rc = rcTmp;1666 }1667 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED1668 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1669 {1670 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1671 vdThreadFinishWrite(pDisk);1672 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1673 pTmp->Type.Root.pvUser2,1674 pTmp->rcReq);1675 vdIoCtxFree(pDisk, pTmp);1676 }1677 }1678 1679 LogFlowFunc(("returns rc=%Rrc\n", rc));1680 return rc;1681 }1682 1683 /**1684 * Processes the list of blocked I/O contexts.1685 *1686 * @returns nothing.1687 * @param pDisk The disk structure.1688 */1689 static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk)1690 {1691 LogFlowFunc(("pDisk=%#p\n", pDisk));1692 1693 VD_IS_LOCKED(pDisk);1694 1695 /* Get the waiting list and process it in FIFO order. */1696 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);1697 1698 /* Reverse it. */1699 PVDIOCTX pCur = pIoCtxHead;1700 pIoCtxHead = NULL;1701 while (pCur)1702 {1703 PVDIOCTX pInsert = pCur;1704 pCur = pCur->pIoCtxNext;1705 pInsert->pIoCtxNext = pIoCtxHead;1706 pIoCtxHead = pInsert;1707 }1708 1709 /* Process now. */1710 pCur = pIoCtxHead;1711 while (pCur)1712 {1713 int rc;1714 PVDIOCTX pTmp = pCur;1715 1716 pCur = pCur->pIoCtxNext;1717 pTmp->pIoCtxNext = NULL;1718 1719 Assert(!pTmp->pIoCtxParent);1720 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);1721 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;1722 1723 rc = vdIoCtxProcessLocked(pTmp);1724 if ( rc == VINF_VD_ASYNC_IO_FINISHED1725 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))1726 {1727 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));1728 vdThreadFinishWrite(pDisk);1729 pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1,1730 pTmp->Type.Root.pvUser2,1731 pTmp->rcReq);1732 vdIoCtxFree(pDisk, pTmp);1733 }1734 }1735 1736 LogFlowFunc(("returns\n"));1737 }1738 1739 /**1740 * Processes the I/O context trying to lock the criticial section.1741 * The context is deferred if the critical section is busy.1742 *1743 * @returns VBox status code.1744 * @param pIoCtx The I/O context to process.1745 */1746 static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)1747 {1748 int rc = VINF_SUCCESS;1749 PVBOXHDD pDisk = pIoCtx->pDisk;1750 1751 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));1752 1753 /* Put it on the waiting list first. */1754 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);1755 1756 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))1757 {1758 /* Leave it again, the context will be processed just before leaving the lock. */1759 LogFlowFunc(("Successfully acquired the lock\n"));1760 rc = vdDiskUnlock(pDisk, pIoCtx);1761 }1762 else1763 {1764 LogFlowFunc(("Lock is held\n"));1765 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1766 }1767 1768 return rc;1769 }1770 1771 /**1772 * Process the I/O context in a synchronous manner, waiting1773 * for it to complete.1774 *1775 * @returns VBox status code of the completed request.1776 * @param pIoCtx The sync I/O context.1777 */1778 static int vdIoCtxProcessSync(PVDIOCTX pIoCtx)1779 {1780 int rc = VINF_SUCCESS;1781 PVBOXHDD pDisk = pIoCtx->pDisk;1782 1783 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));1784 1785 AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC,1786 ("I/O context is not marked as synchronous\n"));1787 1788 rc = vdIoCtxProcessTryLockDefer(pIoCtx);1789 if (rc == VINF_VD_ASYNC_IO_FINISHED)1790 rc = VINF_SUCCESS;1791 1792 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1793 {1794 rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT);1795 AssertRC(rc);1796 1797 rc = pDisk->rcSync;1798 }1799 else /* Success or error. */1800 vdIoCtxFree(pDisk, pIoCtx);1801 1802 return rc;1803 }1804 1805 DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1806 {1807 return pDisk->pIoCtxLockOwner == pIoCtx;1808 }1809 1810 static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx)1811 {1812 int rc = VINF_SUCCESS;1813 1814 VD_IS_LOCKED(pDisk);1815 1816 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));1817 1818 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))1819 {1820 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */1821 vdIoCtxDefer(pDisk, pIoCtx);1822 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;1823 }1824 1825 LogFlowFunc(("returns -> %Rrc\n", rc));1826 return rc;1827 }1828 1829 static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)1830 {1831 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",1832 pDisk, pIoCtx, fProcessBlockedReqs));1833 1834 VD_IS_LOCKED(pDisk);1835 1836 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));1837 Assert(pDisk->pIoCtxLockOwner == pIoCtx);1838 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);1839 1840 if (fProcessBlockedReqs)1841 {1842 /* Process any blocked writes if the current request didn't caused another growing. */1843 vdDiskProcessBlockedIoCtx(pDisk);1844 }1845 1846 LogFlowFunc(("returns\n"));1847 }1848 1849 /**1850 * internal: read the specified amount of data in whatever blocks the backend1851 * will give us - async version.1852 */1853 static int vdReadHelperAsync(PVDIOCTX pIoCtx)1854 {1855 int rc;1856 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;1857 uint64_t uOffset = pIoCtx->Req.Io.uOffset;1858 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;;1859 size_t cbThisRead;1860 1861 /* Loop until all reads started or we have a backend which needs to read metadata. */1862 do1863 {1864 /* Search for image with allocated block. Do not attempt to read more1865 * than the previous reads marked as valid. Otherwise this would return1866 * stale data when different block sizes are used for the images. */1867 cbThisRead = cbToRead;1868 1869 /*1870 * Try to read from the given image.1871 * If the block is not allocated read from override chain if present.1872 */1873 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1874 uOffset, cbThisRead,1875 pIoCtx, &cbThisRead);1876 1877 if (rc == VERR_VD_BLOCK_FREE)1878 {1879 while ( pCurrImage->pPrev != NULL1880 && rc == VERR_VD_BLOCK_FREE)1881 {1882 pCurrImage = pCurrImage->pPrev;1883 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,1884 uOffset, cbThisRead,1885 pIoCtx, &cbThisRead);1886 }1887 }1888 1889 /* The task state will be updated on success already, don't do it here!. */1890 if (rc == VERR_VD_BLOCK_FREE)1891 {1892 /* No image in the chain contains the data for the block. */1893 vdIoCtxSet(pIoCtx, '\0', cbThisRead);1894 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead);1895 rc = VINF_SUCCESS;1896 }1897 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)1898 rc = VINF_SUCCESS;1899 else if (rc == VERR_VD_IOCTX_HALT)1900 {1901 uOffset += cbThisRead;1902 cbToRead -= cbThisRead;1903 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;1904 }1905 1906 if (RT_FAILURE(rc))1907 break;1908 1909 cbToRead -= cbThisRead;1910 uOffset += cbThisRead;1911 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */1912 } while (cbToRead != 0 && RT_SUCCESS(rc));1913 1914 if ( rc == VERR_VD_NOT_ENOUGH_METADATA1915 || rc == VERR_VD_IOCTX_HALT)1916 {1917 /* Save the current state. */1918 pIoCtx->Req.Io.uOffset = uOffset;1919 pIoCtx->Req.Io.cbTransfer = cbToRead;1920 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;1921 }1922 1923 return rc;1924 }1925 1926 /**1927 * internal: parent image read wrapper for compacting.1928 */1929 static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,1930 size_t cbRead)1931 {1932 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;1933 return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset,1934 pvBuf, cbRead, false /* fUpdateCache */);1935 1968 } 1936 1969 … … 1984 2017 1985 2018 /** 1986 * internal: write a complete block (only used for diff images), taking the1987 * remaining data from parent images. This implementation does not optimize1988 * anything (except that it tries to read only that portions from parent1989 * images that are really needed).1990 */1991 static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage,1992 PVDIMAGE pImageParentOverride,1993 uint64_t uOffset, size_t cbWrite,1994 size_t cbThisWrite, size_t cbPreRead,1995 size_t cbPostRead, const void *pvBuf,1996 void *pvTmp)1997 {1998 int rc = VINF_SUCCESS;1999 2000 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2001 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2002 2003 /* Read the data that goes before the write to fill the block. */2004 if (cbPreRead)2005 {2006 /*2007 * Updating the cache doesn't make sense here because2008 * this will be done after the complete block was written.2009 */2010 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2011 uOffset - cbPreRead, pvTmp, cbPreRead,2012 true /* fZeroFreeBlocks*/,2013 false /* fUpdateCache */, 0);2014 if (RT_FAILURE(rc))2015 return rc;2016 }2017 2018 /* Copy the data to the right place in the buffer. */2019 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2020 2021 /* Read the data that goes after the write to fill the block. */2022 if (cbPostRead)2023 {2024 /* If we have data to be written, use that instead of reading2025 * data from the image. */2026 size_t cbWriteCopy;2027 if (cbWrite > cbThisWrite)2028 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2029 else2030 cbWriteCopy = 0;2031 /* Figure out how much we cannot read from the image, because2032 * the last block to write might exceed the nominal size of the2033 * image for technical reasons. */2034 size_t cbFill;2035 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2036 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2037 else2038 cbFill = 0;2039 /* The rest must be read from the image. */2040 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill;2041 2042 /* Now assemble the remaining data. */2043 if (cbWriteCopy)2044 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2045 (char *)pvBuf + cbThisWrite, cbWriteCopy);2046 if (cbReadImage)2047 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride,2048 uOffset + cbThisWrite + cbWriteCopy,2049 (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy,2050 cbReadImage, true /* fZeroFreeBlocks */,2051 false /* fUpdateCache */, 0);2052 if (RT_FAILURE(rc))2053 return rc;2054 /* Zero out the remainder of this block. Will never be visible, as this2055 * is beyond the limit of the image. */2056 if (cbFill)2057 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2058 '\0', cbFill);2059 }2060 2061 /* Write the full block to the virtual disk. */2062 RTSGSEG SegmentBuf;2063 RTSGBUF SgBuf;2064 VDIOCTX IoCtx;2065 2066 SegmentBuf.pvSeg = pvTmp;2067 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2068 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2069 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2070 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2071 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2072 cbPreRead + cbThisWrite + cbPostRead,2073 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2074 Assert(rc != VERR_VD_BLOCK_FREE);2075 Assert(cbPreRead == 0);2076 Assert(cbPostRead == 0);2077 2078 return rc;2079 }2080 2081 /**2082 * internal: write a complete block (only used for diff images), taking the2083 * remaining data from parent images. This implementation optimizes out writes2084 * that do not change the data relative to the state as of the parent images.2085 * All backends which support differential/growing images support this.2086 */2087 static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage,2088 PVDIMAGE pImageParentOverride,2089 uint64_t uOffset, size_t cbWrite,2090 size_t cbThisWrite, size_t cbPreRead,2091 size_t cbPostRead, const void *pvBuf,2092 void *pvTmp, unsigned cImagesRead)2093 {2094 size_t cbFill = 0;2095 size_t cbWriteCopy = 0;2096 size_t cbReadImage = 0;2097 int rc;2098 2099 LogFlowFunc(("pDisk=%p pImage=%p pImageParentOverride=%p uOffset=%llu cbWrite=%zu\n",2100 pDisk, pImage, pImageParentOverride, uOffset, cbWrite));2101 2102 if (cbPostRead)2103 {2104 /* Figure out how much we cannot read from the image, because2105 * the last block to write might exceed the nominal size of the2106 * image for technical reasons. */2107 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)2108 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;2109 2110 /* If we have data to be written, use that instead of reading2111 * data from the image. */2112 if (cbWrite > cbThisWrite)2113 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);2114 2115 /* The rest must be read from the image. */2116 cbReadImage = cbPostRead - cbWriteCopy - cbFill;2117 }2118 2119 /* Read the entire data of the block so that we can compare whether it will2120 * be modified by the write or not. */2121 rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp,2122 cbPreRead + cbThisWrite + cbPostRead - cbFill,2123 true /* fZeroFreeBlocks */, false /* fUpdateCache */,2124 cImagesRead);2125 if (RT_FAILURE(rc))2126 return rc;2127 2128 /* Check if the write would modify anything in this block. */2129 if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite)2130 && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite,2131 (char *)pvBuf + cbThisWrite, cbWriteCopy)))2132 {2133 /* Block is completely unchanged, so no need to write anything. */2134 return VINF_SUCCESS;2135 }2136 2137 /* Copy the data to the right place in the buffer. */2138 memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite);2139 2140 /* Handle the data that goes after the write to fill the block. */2141 if (cbPostRead)2142 {2143 /* Now assemble the remaining data. */2144 if (cbWriteCopy)2145 memcpy((char *)pvTmp + cbPreRead + cbThisWrite,2146 (char *)pvBuf + cbThisWrite, cbWriteCopy);2147 /* Zero out the remainder of this block. Will never be visible, as this2148 * is beyond the limit of the image. */2149 if (cbFill)2150 memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage,2151 '\0', cbFill);2152 }2153 2154 /* Write the full block to the virtual disk. */2155 RTSGSEG SegmentBuf;2156 RTSGBUF SgBuf;2157 VDIOCTX IoCtx;2158 2159 SegmentBuf.pvSeg = pvTmp;2160 SegmentBuf.cbSeg = cbPreRead + cbThisWrite + cbPostRead;2161 RTSgBufInit(&SgBuf, &SegmentBuf, 1);2162 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL,2163 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);2164 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset - cbPreRead,2165 cbPreRead + cbThisWrite + cbPostRead,2166 &IoCtx, NULL, &cbPreRead, &cbPostRead, 0);2167 Assert(rc != VERR_VD_BLOCK_FREE);2168 Assert(cbPreRead == 0);2169 Assert(cbPostRead == 0);2170 2171 return rc;2172 }2173 2174 /**2175 2019 * internal: write buffer to the image, taking care of block boundaries and 2176 2020 * write optimizations. … … 2181 2025 bool fUpdateCache, unsigned cImagesRead) 2182 2026 { 2183 int rc; 2184 unsigned fWrite; 2185 size_t cbThisWrite; 2186 size_t cbPreRead, cbPostRead; 2187 uint64_t uOffsetCur = uOffset; 2188 size_t cbWriteCur = cbWrite; 2189 const void *pcvBufCur = pvBuf; 2190 RTSGSEG SegmentBuf; 2027 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; 2028 RTSGSEG Segment; 2191 2029 RTSGBUF SgBuf; 2192 2030 VDIOCTX IoCtx; 2193 2031 2194 /* Loop until all written. */ 2195 do 2196 { 2197 /* Try to write the possibly partial block to the last opened image. 2198 * This works when the block is already allocated in this image or 2199 * if it is a full-block write (and allocation isn't suppressed below). 2200 * For image formats which don't support zero blocks, it's beneficial 2201 * to avoid unnecessarily allocating unchanged blocks. This prevents 2202 * unwanted expanding of images. VMDK is an example. */ 2203 cbThisWrite = cbWriteCur; 2204 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) 2205 ? 0 : VD_WRITE_NO_ALLOC; 2206 2207 SegmentBuf.pvSeg = (void *)pcvBufCur; 2208 SegmentBuf.cbSeg = cbWrite; 2209 RTSgBufInit(&SgBuf, &SegmentBuf, 1); 2210 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, 0, 0, NULL, 2211 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); 2212 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, cbThisWrite, 2213 &IoCtx, &cbThisWrite, &cbPreRead, 2214 &cbPostRead, fWrite); 2215 if (rc == VERR_VD_BLOCK_FREE) 2216 { 2217 void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); 2218 AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); 2219 2220 if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) 2221 { 2222 /* Optimized write, suppress writing to a so far unallocated 2223 * block if the data is in fact not changed. */ 2224 rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, 2225 uOffsetCur, cbWriteCur, 2226 cbThisWrite, cbPreRead, cbPostRead, 2227 pcvBufCur, pvTmp, cImagesRead); 2228 } 2229 else 2230 { 2231 /* Normal write, not optimized in any way. The block will 2232 * be written no matter what. This will usually (unless the 2233 * backend has some further optimization enabled) cause the 2234 * block to be allocated. */ 2235 rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, 2236 uOffsetCur, cbWriteCur, 2237 cbThisWrite, cbPreRead, cbPostRead, 2238 pcvBufCur, pvTmp); 2239 } 2240 RTMemTmpFree(pvTmp); 2241 if (RT_FAILURE(rc)) 2242 break; 2243 } 2244 2245 cbWriteCur -= cbThisWrite; 2246 uOffsetCur += cbThisWrite; 2247 pcvBufCur = (char *)pcvBufCur + cbThisWrite; 2248 } while (cbWriteCur != 0 && RT_SUCCESS(rc)); 2249 2250 #if 0 /** @todo: Soon removed when sync and async version of the write helper are merged. */ 2251 /* Update the cache on success */ 2252 if ( RT_SUCCESS(rc) 2253 && pDisk->pCache 2254 && fUpdateCache) 2255 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); 2256 2257 if (RT_SUCCESS(rc)) 2258 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); 2259 #endif 2260 2261 return rc; 2032 if (fUpdateCache) 2033 fFlags |= VDIOCTX_FLAGS_READ_UDATE_CACHE; 2034 2035 Segment.pvSeg = (void *)pvBuf; 2036 Segment.cbSeg = cbWrite; 2037 RTSgBufInit(&SgBuf, &Segment, 1); 2038 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, 2039 NULL, vdWriteHelperAsync, fFlags); 2040 2041 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; 2042 IoCtx.Req.Io.cImagesRead = cImagesRead; 2043 IoCtx.pIoCtxParent = NULL; 2044 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; 2045 IoCtx.Type.Root.pvUser1 = pDisk; 2046 IoCtx.Type.Root.pvUser2 = NULL; 2047 return vdIoCtxProcessSync(&IoCtx); 2262 2048 } 2263 2049 … … 2491 2277 } 2492 2278 2493 /** 2494 * internal: write a complete block (only used for diff images), taking the 2495 * remaining data from parent images. This implementation does not optimize 2496 * anything (except that it tries to read only that portions from parent 2497 * images that are really needed) - async version. 2498 */ 2499 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2500 { 2501 int rc = VINF_SUCCESS; 2502 2503 #if 0 2504 2505 /* Read the data that goes before the write to fill the block. */ 2506 if (cbPreRead) 2507 { 2508 rc = vdReadHelperAsync(pIoCtxDst); 2509 if (RT_FAILURE(rc)) 2510 return rc; 2511 } 2512 2513 /* Copy the data to the right place in the buffer. */ 2514 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); 2515 2516 /* Read the data that goes after the write to fill the block. */ 2517 if (cbPostRead) 2518 { 2519 /* If we have data to be written, use that instead of reading 2520 * data from the image. */ 2521 size_t cbWriteCopy; 2522 if (cbWrite > cbThisWrite) 2523 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2524 else 2525 cbWriteCopy = 0; 2526 /* Figure out how much we cannot read from the image, because 2527 * the last block to write might exceed the nominal size of the 2528 * image for technical reasons. */ 2529 size_t cbFill; 2530 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2531 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2532 else 2533 cbFill = 0; 2534 /* The rest must be read from the image. */ 2535 size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2536 2537 /* Now assemble the remaining data. */ 2538 if (cbWriteCopy) 2539 { 2540 vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); 2541 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); 2542 } 2543 2544 if (cbReadImage) 2545 rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, 2546 uOffset + cbThisWrite + cbWriteCopy, 2547 cbReadImage); 2548 if (RT_FAILURE(rc)) 2549 return rc; 2550 /* Zero out the remainder of this block. Will never be visible, as this 2551 * is beyond the limit of the image. */ 2552 if (cbFill) 2553 { 2554 vdIoCtxSet(pIoCtxDst, '\0', cbFill); 2555 ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); 2556 } 2557 } 2558 2559 if ( !pIoCtxDst->cbTransferLeft 2560 && !pIoCtxDst->cMetaTransfersPending 2561 && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) 2562 { 2563 /* Write the full block to the virtual disk. */ 2564 vdIoCtxChildReset(pIoCtxDst); 2565 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2566 uOffset - cbPreRead, 2567 cbPreRead + cbThisWrite + cbPostRead, 2568 pIoCtxDst, 2569 NULL, &cbPreRead, &cbPostRead, 0); 2570 Assert(rc != VERR_VD_BLOCK_FREE); 2571 Assert(cbPreRead == 0); 2572 Assert(cbPostRead == 0); 2573 } 2574 else 2575 { 2576 LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", 2577 pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, 2578 pIoCtxDst->fComplete)); 2579 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2580 } 2581 2582 return rc; 2583 #endif 2584 return VERR_NOT_IMPLEMENTED; 2585 } 2586 2587 static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) 2279 static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) 2588 2280 { 2589 2281 int rc = VINF_SUCCESS; … … 2595 2287 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2596 2288 rc = pImage->Backend->pfnWrite(pImage->pBackendData, 2597 2598 2599 2289 pIoCtx->Req.Io.uOffset - cbPreRead, 2290 cbPreRead + cbThisWrite + cbPostRead, 2291 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); 2600 2292 Assert(rc != VERR_VD_BLOCK_FREE); 2601 2293 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); … … 2686 2378 /* Write the full block to the virtual disk. */ 2687 2379 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2688 pIoCtx->pfnIoCtxTransferNext = vdWriteHelper OptimizedCommitAsync;2380 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2689 2381 2690 2382 return rc; … … 2696 2388 2697 2389 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2390 2391 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2698 2392 2699 2393 if (pIoCtx->Req.Io.cbTransferLeft) … … 2762 2456 /* Next step */ 2763 2457 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync; 2458 return VINF_SUCCESS; 2459 } 2460 2461 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx) 2462 { 2463 int rc = VINF_SUCCESS; 2464 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2465 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2466 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent; 2467 2468 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2469 2470 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite); 2471 if (cbPostRead) 2472 { 2473 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill; 2474 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy; 2475 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage; 2476 2477 /* Now assemble the remaining data. */ 2478 if (cbWriteCopy) 2479 { 2480 /* 2481 * The S/G buffer of the parent needs to be cloned because 2482 * it is not allowed to modify the state. 2483 */ 2484 RTSGBUF SgBufParentTmp; 2485 2486 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf); 2487 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy); 2488 } 2489 2490 /* Zero out the remainder of this block. Will never be visible, as this 2491 * is beyond the limit of the image. */ 2492 if (cbFill) 2493 { 2494 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage); 2495 vdIoCtxSet(pIoCtx, '\0', cbFill); 2496 } 2497 2498 if (cbReadImage) 2499 { 2500 /* Read remaining data. */ 2501 } 2502 else 2503 { 2504 /* Write the full block to the virtual disk. */ 2505 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2506 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2507 } 2508 } 2509 else 2510 { 2511 /* Write the full block to the virtual disk. */ 2512 RTSgBufReset(&pIoCtx->Req.Io.SgBuf); 2513 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; 2514 } 2515 2516 return rc; 2517 } 2518 2519 static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx) 2520 { 2521 int rc = VINF_SUCCESS; 2522 2523 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2524 2525 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; 2526 2527 if (pIoCtx->Req.Io.cbTransferLeft) 2528 rc = vdReadHelperAsync(pIoCtx); 2529 2530 if ( RT_SUCCESS(rc) 2531 && ( pIoCtx->Req.Io.cbTransferLeft 2532 || pIoCtx->cMetaTransfersPending)) 2533 rc = VERR_VD_ASYNC_IO_IN_PROGRESS; 2534 else 2535 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2536 2537 return rc; 2538 } 2539 2540 static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) 2541 { 2542 PVBOXHDD pDisk = pIoCtx->pDisk; 2543 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved; 2544 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; 2545 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead; 2546 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; 2547 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent; 2548 size_t cbFill = 0; 2549 size_t cbWriteCopy = 0; 2550 size_t cbReadImage = 0; 2551 2552 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); 2553 2554 AssertPtr(pIoCtx->pIoCtxParent); 2555 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent); 2556 2557 /* Calculate the amount of data to read that goes after the write to fill the block. */ 2558 if (cbPostRead) 2559 { 2560 /* If we have data to be written, use that instead of reading 2561 * data from the image. */ 2562 cbWriteCopy; 2563 if (cbWrite > cbThisWrite) 2564 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); 2565 2566 /* Figure out how much we cannot read from the image, because 2567 * the last block to write might exceed the nominal size of the 2568 * image for technical reasons. */ 2569 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) 2570 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; 2571 2572 /* The rest must be read from the image. */ 2573 cbReadImage = cbPostRead - cbWriteCopy - cbFill; 2574 } 2575 2576 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill; 2577 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy; 2578 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage; 2579 2580 /* Next step */ 2581 if (cbPreRead) 2582 { 2583 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync; 2584 2585 /* Read the data that goes before the write to fill the block. */ 2586 pIoCtx->Req.Io.cbTransferLeft = cbPreRead; 2587 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft; 2588 pIoCtx->Req.Io.uOffset -= cbPreRead; 2589 } 2590 else 2591 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; 2592 2764 2593 return VINF_SUCCESS; 2765 2594 } … … 3898 3727 PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK); 3899 3728 3729 Log(("I/O task list cleared\n")); 3730 3900 3731 /* Reverse order. */ 3901 3732 PVDIOTASK pCur = pHead; … … 4027 3858 PVBOXHDD pDisk = pIoStorage->pVDIo->pDisk; 4028 3859 3860 Log(("Deferring I/O task pIoTask=%p\n", pIoTask)); 3861 4029 3862 /* Put it on the waiting list. */ 4030 3863 PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK); … … 4226 4059 4227 4060 void *pvTask; 4061 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx)); 4228 4062 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser, 4229 4063 pIoStorage->pStorage, uOffset, … … 4319 4153 4320 4154 void *pvTask; 4155 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx)); 4321 4156 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser, 4322 4157 pIoStorage->pStorage, … … 9637 9472 pfnComplete, pvUser1, pvUser2, 9638 9473 NULL, vdReadHelperAsync, 9639 VDIOCTX_FLAGS_ DEFAULT);9474 VDIOCTX_FLAGS_ZERO_FREE_BLOCKS); 9640 9475 if (!pIoCtx) 9641 9476 {
Note:
See TracChangeset
for help on using the changeset viewer.