VirtualBox

Changeset 106832 in vbox for trunk/src/VBox


Ignore:
Timestamp:
Nov 5, 2024 11:35:04 AM (3 months ago)
Author:
vboxsync
Message:

Main/{Machine,Medium,Snapshot}: Address broken reference counting of
medium back references when snapshots are involved which could cause
back references to get removed while the medium was still in use. Also
resolved some locking issues for various failure scenarios when creating
or deleting snapshots.

Location:
trunk/src/VBox/Main/src-server
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Main/src-server/MachineImpl.cpp

    r106061 r106832  
    1058210582    AssertComRCReturn(autoCaller.hrc(), autoCaller.hrc());
    1058310583
    10584     AutoMultiWriteLock2 alock(this->lockHandle(),
    10585                               &mParent->i_getMediaTreeLockHandle() COMMA_LOCKVAL_SRC_POS);
     10584    /* We can't use AutoMultiWriteLock2 here as some error code paths acquire
     10585     * the media tree lock which means we need to be able to drop the media
     10586     * tree lock individually in those cases. */
     10587    AutoWriteLock aMachineLock(this->lockHandle() COMMA_LOCKVAL_SRC_POS);
     10588    AutoWriteLock aTreeLock(&mParent->i_getMediaTreeLockHandle() COMMA_LOCKVAL_SRC_POS);
    1058610589
    1058710590    /* must be in a protective state because we release the lock below */
     
    1062110624
    1062210625                    MediumLockList *pMediumLockList(new MediumLockList());
    10623                     alock.release();
     10626                    aTreeLock.release();
     10627                    aMachineLock.release();
    1062410628                    hrc = pMedium->i_createMediumLockList(true /* fFailIfInaccessible */,
    1062510629                                                          NULL /* pToLockWrite */,
     
    1062710631                                                          NULL,
    1062810632                                                          *pMediumLockList);
    10629                     alock.acquire();
     10633                    aMachineLock.acquire();
     10634                    aTreeLock.acquire();
    1063010635                    if (FAILED(hrc))
    1063110636                    {
     
    1064010645
    1064110646            /* Now lock all media. If this fails, nothing is locked. */
    10642             alock.release();
     10647            aTreeLock.release();
     10648            aMachineLock.release();
    1064310649            hrc = lockedMediaMap->Lock();
    10644             alock.acquire();
     10650            aMachineLock.acquire();
     10651            aTreeLock.acquire();
    1064510652            if (FAILED(hrc))
    1064610653                throw setError(hrc, tr("Locking of attached media failed"));
     
    1071110718                             uuidRegistryParent,
    1071210719                             DeviceType_HardDisk);
    10713             if (FAILED(hrc)) throw hrc;
     10720            if (FAILED(hrc))
     10721            {
     10722                /* Throwing an exception here causes the 'diff' object to go out of scope
     10723                 * which triggers its destructor (ComObjPtr<Medium>::~ComObjPtr) which will
     10724                 * ultimately call Medium::uninit() which acquires the media tree lock
     10725                 * (VirtualBox::i_getMediaTreeLockHandle()) so drop the media tree lock here. */
     10726                aTreeLock.release();
     10727                throw hrc;
     10728            }
    1071410729
    1071510730            /** @todo r=bird: How is the locking and diff image cleaned up if we fail before
     
    1072410739            if (aOnline)
    1072510740            {
    10726                 alock.release();
     10741                aTreeLock.release();
     10742                aMachineLock.release();
    1072710743                /* The currently attached medium will be read-only, change
    1072810744                 * the lock type to read. */
    1072910745                hrc = pMediumLockList->Update(pMedium, false);
    10730                 alock.acquire();
     10746                aMachineLock.acquire();
     10747                aTreeLock.acquire();
    1073110748                AssertComRCThrowRC(hrc);
    1073210749            }
    1073310750
    1073410751            /* release the locks before the potentially lengthy operation */
    10735             alock.release();
     10752            aTreeLock.release();
     10753            aMachineLock.release();
    1073610754            hrc = pMedium->i_createDiffStorage(diff,
    1073710755                                               pMedium->i_getPreferredDiffVariant(),
     
    1074010758                                               true /* aWait */,
    1074110759                                               false /* aNotify */);
    10742             alock.acquire();
    10743             if (FAILED(hrc)) throw hrc;
     10760            aMachineLock.acquire();
     10761            aTreeLock.acquire();
     10762            if (FAILED(hrc))
     10763            {
     10764                /* As above, 'diff' will go out of scope via the 'throw' here resulting in
     10765                 * Medium::uninit() being called which acquires the media tree lock. */
     10766                aTreeLock.release();
     10767                throw hrc;
     10768            }
    1074410769
    1074510770            /* actual lock list update is done in Machine::i_commitMedia */
     
    1076410789                                   pAtt->i_getHotPluggable(),
    1076510790                                   pAtt->i_getBandwidthGroup());
    10766             if (FAILED(hrc)) throw hrc;
     10791            if (FAILED(hrc)) {
     10792                /* As above, 'diff' will go out of scope via the 'throw' here resulting in
     10793                 * Medium::uninit() being called which acquires the media tree lock. */
     10794                aTreeLock.release();
     10795                throw hrc;
     10796            }
    1076710797
    1076810798            hrc = lockedMediaMap->ReplaceKey(pAtt, attachment);
     
    1264612676#endif /* VBOX_WITH_USB */
    1264712677
    12648     // we need to lock this object in uninit() because the lock is shared
    12649     // with mPeer (as well as data we modify below). mParent lock is needed
    12650     // by several calls to it.
    12651     AutoMultiWriteLock2 multilock(mParent, this COMMA_LOCKVAL_SRC_POS);
     12678    /* We need to lock this object in uninit() because the lock is shared
     12679     * with mPeer (as well as data we modify below). mParent lock is needed
     12680     * by several calls to it.
     12681     * We can't use AutoMultiWriteLock2 here as some error code paths
     12682     * acquire the machine lock so we need to be able to drop the machine
     12683     * lock individually in those cases. */
     12684    AutoWriteLock aVBoxLock(mParent COMMA_LOCKVAL_SRC_POS);
     12685    AutoWriteLock aMachineLock(this COMMA_LOCKVAL_SRC_POS);
    1265212686
    1265312687#ifdef VBOX_WITH_RESOURCE_USAGE_API
     
    1269012724    {
    1269112725        Log1WarningThisFunc(("Discarding unsaved settings changes!\n"));
    12692         i_rollback(false /* aNotify */);
     12726        aMachineLock.release();
     12727        discardSettings();
     12728        mParent->i_unmarkRegistryModified(i_getId());
     12729        aMachineLock.acquire();
    1269312730    }
    1269412731
     
    1271812755            ComPtr<IInternalSessionControl> pControl = *it;
    1271912756            mData->mSession.mRemoteControls.erase(it);
    12720             multilock.release();
     12757            aMachineLock.release();
     12758            aVBoxLock.release();
    1272112759            LogFlowThisFunc(("  Calling remoteControl->Uninitialize()...\n"));
    1272212760            HRESULT hrc = pControl->Uninitialize();
     
    1272412762            if (FAILED(hrc))
    1272512763                Log1WarningThisFunc(("Forgot to close the remote session?\n"));
    12726             multilock.acquire();
     12764            aVBoxLock.acquire();
     12765            aMachineLock.acquire();
    1272712766        }
    1272812767        mData->mSession.mRemoteControls.clear();
     
    1275012789                if (SUCCEEDED(hrc))
    1275112790                {
    12752                     multilock.release();
     12791                    aMachineLock.release();
     12792                    aVBoxLock.release();
    1275312793                    Utf8Str strName(name);
    1275412794                    LogRel(("VM '%s' stops using NAT network '%s'\n",
    1275512795                            mUserData->s.strName.c_str(), strName.c_str()));
    1275612796                    mParent->i_natNetworkRefDec(strName);
    12757                     multilock.acquire();
     12797                    aVBoxLock.acquire();
     12798                    aMachineLock.acquire();
    1275812799                }
    1275912800            }
     
    1283412875    mData.free();
    1283512876
    12836     /* release the exclusive lock before setting the below two to NULL */
    12837     multilock.release();
     12877    /* release the exclusive locks before setting the below two to NULL */
     12878    aMachineLock.release();
     12879    aVBoxLock.release();
    1283812880
    1283912881    unconst(mParent) = NULL;
  • trunk/src/VBox/Main/src-server/MediumImpl.cpp

    r106077 r106832  
    46174617    AssertReturn(aMachineId.isValid(), E_FAIL);
    46184618
    4619     LogFlowThisFunc(("ENTER, aMachineId: {%RTuuid}, aSnapshotId: {%RTuuid}\n", aMachineId.raw(), aSnapshotId.raw()));
    4620 
    46214619    AutoCaller autoCaller(this);
    46224620    AssertComRCReturnRC(autoCaller.hrc());
    46234621
    46244622    AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
     4623
     4624    LogFlowThisFunc(("ENTER: medium: '%s' aMachineId: {%RTuuid}, aSnapshotId: {%RTuuid}\n",
     4625        i_getLocationFull().c_str(), aMachineId.raw(), aSnapshotId.raw()));
    46254626
    46264627    switch (m->state)
     
    47444745    AutoWriteLock alock(this COMMA_LOCKVAL_SRC_POS);
    47454746
     4747    LogFlowThisFunc(("ENTER: medium: '%s' aMachineId: {%RTuuid}, aSnapshotId: {%RTuuid}\n",
     4748        i_getLocationFull().c_str(), aMachineId.raw(), aSnapshotId.raw()));
     4749
    47464750    BackRefList::iterator it =
    47474751        std::find_if(m->backRefs.begin(), m->backRefs.end(),
     
    47494753    AssertReturn(it != m->backRefs.end(), E_FAIL);
    47504754
     4755    bool fDvd = false;
     4756    {
     4757        AutoReadLock arlock(this COMMA_LOCKVAL_SRC_POS);
     4758        /*
     4759         *  Check the medium is DVD and readonly. It's for the case if DVD
     4760         *  will be able to be writable sometime in the future.
     4761         */
     4762        fDvd = m->type == MediumType_Readonly && m->devType == DeviceType_DVD;
     4763    }
     4764
    47514765    if (aSnapshotId.isZero())
    47524766    {
     4767        /* Attached removable media which are part of a snapshot can be added to a
     4768         * back reference more than once (as described above in i_addBackReference())
     4769         * so we need to balance out the reference counting for DVDs here. */
     4770        if (fDvd && it->iRefCnt > 1)
     4771            it->iRefCnt--;
    47534772        it->iRefCnt--;
    47544773        if (it->iRefCnt > 0)
    47554774            return S_OK;
    47564775
    4757         /* remove the current state attachment */
     4776        /* Mark the medium as no longer in use by the current machine although it
     4777         * may still be included in one or more snapshots (it->llSnapshotIds). */
    47584778        it->fInCurState = false;
    47594779    }
     
    47734793
    47744794        it->llSnapshotIds.erase(jt);
     4795
     4796        /* The BackRef() constructor in i_addBackReference() creates back references
     4797         * for attached hard disk mediums associated with a snapshot with the reference
     4798         * count set to '1' and fInCurState=false before adding the snapshot to
     4799         * it->llSnapshotIds.  When removing that snapshot reference here we need to
     4800         * decrement the reference count since there won't be a separate
     4801         * i_removeBackReference() call with aSnapshotId.isZero() since this hard disk
     4802         * medium is only associated with this snapshot.  Removable media like DVDs
     4803         * associated with a snapshot on the other hand can be attached twice, once for
     4804         * the snapshot and once for the machine (aSnapshotId.isZero()=true) and their
     4805         * reference counting is balanced out in the machine case above. */
     4806        if (!fDvd && it->fInCurState == false && it->iRefCnt == 1 && it->llSnapshotIds.size() == 0)
     4807            it->iRefCnt--;
    47754808    }
    47764809
    47774810    /* if the backref becomes empty, remove it */
    4778     if (it->fInCurState == false && it->llSnapshotIds.size() == 0)
     4811    if (it->fInCurState == false && it->iRefCnt == 0 && it->llSnapshotIds.size() == 0)
    47794812        m->backRefs.erase(it);
    47804813
     
    48684901    {
    48694902        const BackRef &ref = *it2;
    4870         LogFlowThisFunc(("  Backref from machine {%RTuuid} (fInCurState: %d, iRefCnt: %d)\n", ref.machineId.raw(), ref.fInCurState, ref.iRefCnt));
     4903        LogFlowThisFunc(("  Backref from machine={%RTuuid} (fInCurState=%d, iRefCnt=%d)\n", ref.machineId.raw(), ref.fInCurState, ref.iRefCnt));
    48714904
    48724905        for (std::list<SnapshotRef>::const_iterator jt2 = it2->llSnapshotIds.begin();
     
    48754908        {
    48764909            const Guid &id = jt2->snapshotId;
    4877             LogFlowThisFunc(("  Backref from snapshot {%RTuuid} (iRefCnt = %d)\n", id.raw(), jt2->iRefCnt));
     4910            LogFlowThisFunc(("  Backref from snapshot={%RTuuid} (iRefCnt=%d)\n", id.raw(), jt2->iRefCnt));
    48784911        }
    48794912    }
  • trunk/src/VBox/Main/src-server/SnapshotImpl.cpp

    r106061 r106832  
    18591859        mMediumAttachments.backup();
    18601860
     1861        /* We need to set fBeganTakingSnapshot=true here to handle not only the
     1862         * scenario of i_createImplicitDiffs() succeeding but also the failure case
     1863         * since differencing hard disks can still be created which then need to be
     1864         * cleaned up and deleted via i_finishTakingSnapshot() below. */
     1865        fBeganTakingSnapshot = true;
     1866
    18611867        alock.release();
    18621868        /* create new differencing hard disks and attach them to this machine */
     
    18641870                                    1,            // operation weight; must be the same as in Machine::TakeSnapshot()
    18651871                                    task.m_fTakingSnapshotOnline);
     1872        /* If i_createImplicitDiffs() fails the 'catch' block below calls
     1873         * Snapshot::uninit() which requires the machine to be write locked
     1874         * so reacquire alock here before testing for failure. */
     1875        alock.acquire();
    18661876        if (FAILED(hrc))
    18671877            throw hrc;
    1868         alock.acquire();
    18691878
    18701879        // MUST NOT save the settings or the media registry here, because
    18711880        // this causes trouble with rolling back settings if the user cancels
    18721881        // taking the snapshot after the diff images have been created.
    1873 
    1874         fBeganTakingSnapshot = true;
    18751882
    18761883        // STEP 3: save the VM state (if online)
     
    26082615        ErrorInfoKeeper eik;
    26092616
     2617        /* If any MediumAttachments have been modified then Machine::i_rollback() will
     2618         * go through Machine::i_rollbackMedia() -> Machine::i_deleteImplicitDiffs() ->
     2619         * Medium::i_deleteStorage() -> Medium::i_markRegistriesModified() ->
     2620         * VirtualBox::i_markRegistryModified() -> VirtualBox::i_findMachine() which
     2621         * attempts to acquire the Machine object lock for 'VirtualBox::allMachines' but
     2622         * the locking order when accessing the MachinesOList type (aka ObjectsList<Machine>)
     2623         * requires that the machines lock list (LOCKCLASS_LISTOFMACHINES) be
     2624         * acquired first and then the Machine object lock (LOCKCLASS_MACHINEOBJECT).
     2625         */
     2626        AutoReadLock alockMachines(mParent->i_getMachinesListLockHandle() COMMA_LOCKVAL_SRC_POS);
     2627
    26102628        /* undo all changes on failure */
    26112629        i_rollback(false /* aNotify */);
    2612 
    26132630    }
    26142631
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette