VirtualBox

Changeset 61980 in vbox for trunk/src


Ignore:
Timestamp:
Jul 1, 2016 2:44:32 PM (8 years ago)
Author:
vboxsync
Message:

common/utils.py: Optimized unpackFile().

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/ValidationKit/common/utils.py

    r61957 r61980  
    3939import subprocess;
    4040import sys;
    41 import tarfile;
    4241import time;
    4342import traceback;
    4443import unittest;
    45 import zipfile
    4644
    4745if sys.platform == 'win32':
     
    5654# Python 3 hacks:
    5755if sys.version_info[0] >= 3:
    58     long = int;     # pylint: disable=W0622,C0103
     56    xrange = range; # pylint: disable=redefined-builtin,invalid-name
     57    long = int;     # pylint: disable=redefined-builtin,invalid-name
    5958
    6059
     
    14391438
    14401439
    1441 def unpackFile(sArchive, sDstDir, fnLog, fnError = None):
     1440def unpackZipFile(sArchive, sDstDir, fnLog, fnError = None, fnFilter = None):
     1441    # type: (string, string, (string) -> None, (string) -> None, (string) -> bool) -> list[string]
     1442    """
     1443    Worker for unpackFile that deals with ZIP files, same function signature.
     1444    """
     1445    import zipfile
     1446    if fnError is None:
     1447        fnError = fnLog;
     1448
     1449    fnLog('Unzipping "%s" to "%s"...' % (sArchive, sDstDir));
     1450
     1451    # Open it.
     1452    try: oZipFile = zipfile.ZipFile(sArchive, 'r')
     1453    except Exception as oXcpt:
     1454        fnError('Error opening "%s" for unpacking into "%s": %s' % (sArchive, sDstDir, oXcpt,));
     1455        return None;
     1456
     1457    # Extract all members.
     1458    asMembers = [];
     1459    try:
     1460        for sMember in oZipFile.namelist():
     1461            if fnFilter is None  or  fnFilter(sMember) is not False:
     1462                if sMember.endswith('/'):
     1463                    os.makedirs(os.path.join(sDstDir, sMember.replace('/', os.path.sep)), 0x1fd); # octal: 0775 (python 3/2)
     1464                else:
     1465                    oZipFile.extract(sMember, sDstDir);
     1466                asMembers.append(os.path.join(sDstDir, sMember.replace('/', os.path.sep)));
     1467    except Exception as oXcpt:
     1468        fnError('Error unpacking "%s" into "%s": %s' % (sArchive, sDstDir, oXcpt));
     1469        asMembers = None;
     1470
     1471    # close it.
     1472    try: oZipFile.close();
     1473    except Exception as oXcpt:
     1474        fnError('Error closing "%s" after unpacking into "%s": %s' % (sArchive, sDstDir, oXcpt));
     1475        asMembers = None;
     1476
     1477    return asMembers;
     1478
     1479
     1480## Good buffer for file operations.
     1481g_cbGoodBufferSize = 256*1024;
     1482
     1483## The original shutil.copyfileobj.
     1484g_fnOriginalShCopyFileObj = None;
     1485
     1486def __myshutilcopyfileobj(fsrc, fdst, length = g_cbGoodBufferSize):
     1487    """ shutil.copyfileobj with different length default value (16384 is slow with python 2.7 on windows). """
     1488    return g_fnOriginalShCopyFileObj(fsrc, fdst, length);
     1489
     1490## Set if we've replaced tarfile.copyfileobj with __mytarfilecopyfileobj already.
     1491g_fTarCopyFileObjOverriddend = False;
     1492
     1493def __mytarfilecopyfileobj(src, dst, length = None, exception = OSError):
     1494    """ tarfile.copyfileobj with different buffer size (16384 is slow on windows). """
     1495    if length is None:
     1496        __myshutilcopyfileobj(src, dst, g_cbGoodBufferSize);
     1497    elif length > 0:
     1498        cFull, cbRemainder = divmod(length, g_cbGoodBufferSize);
     1499        for _ in xrange(cFull):
     1500            abBuffer = src.read(g_cbGoodBufferSize);
     1501            dst.write(abBuffer);
     1502            if len(abBuffer) != g_cbGoodBufferSize:
     1503                raise exception('unexpected end of source file');
     1504        if cbRemainder > 0:
     1505            abBuffer = src.read(cbRemainder);
     1506            dst.write(abBuffer);
     1507            if len(abBuffer) != cbRemainder:
     1508                raise exception('unexpected end of source file');
     1509
     1510
     1511def unpackTarFile(sArchive, sDstDir, fnLog, fnError = None, fnFilter = None):
     1512    # type: (string, string, (string) -> None, (string) -> None, (string) -> bool) -> list[string]
     1513    """
     1514    Worker for unpackFile that deals with tarballs, same function signature.
     1515    """
     1516    import shutil;
     1517    import tarfile;
     1518    if fnError is None:
     1519        fnError = fnLog;
     1520
     1521    fnLog('Untarring "%s" to "%s"...' % (sArchive, sDstDir));
     1522
     1523    #
     1524    # Default buffer sizes of 16384 bytes is causing too many syscalls on Windows.
     1525    # 60%+ speedup for python 2.7 and 50%+ speedup for python 3.5, both on windows with PDBs.
     1526    # 20%+ speedup for python 2.7 and 15%+ speedup for python 3.5, both on windows skipping PDBs.
     1527    #
     1528    if True is True:
     1529        global g_fnOriginalShCopyFileObj;
     1530        if g_fnOriginalShCopyFileObj is None:
     1531            g_fnOriginalShCopyFileObj = shutil.copyfileobj;
     1532            shutil.copyfileobj = __myshutilcopyfileobj;
     1533        global g_fTarCopyFileObjOverriddend;
     1534        if g_fTarCopyFileObjOverriddend is False:
     1535            g_fTarCopyFileObjOverriddend = True;
     1536            tarfile.copyfileobj = __mytarfilecopyfileobj;
     1537
     1538    #
     1539    # Open it.
     1540    #
     1541    # Note! We not using 'r:*' because we cannot allow seeking compressed files!
     1542    #       That's how we got a 13 min unpack time for VBoxAll on windows (hardlinked pdb).
     1543    #
     1544    try: oTarFile = tarfile.open(sArchive, 'r|*', bufsize = g_cbGoodBufferSize);
     1545    except Exception as oXcpt:
     1546        fnError('Error opening "%s" for unpacking into "%s": %s' % (sArchive, sDstDir, oXcpt,));
     1547        return None;
     1548
     1549    # Extract all members.
     1550    asMembers = [];
     1551    try:
     1552        for oTarInfo in oTarFile:
     1553            try:
     1554                if fnFilter is None  or  fnFilter(oTarInfo.name) is not False:
     1555                    if oTarInfo.islnk():
     1556                        # Links are trouble, especially on Windows.  We must avoid the falling that will end up seeking
     1557                        # in the compressed tar stream.  So, fall back on shutil.copy2 instead.
     1558                        sLinkFile     = os.path.join(sDstDir, oTarInfo.name.rstrip('/').replace('/', os.path.sep));
     1559                        sLinkTarget   = os.path.join(sDstDir, oTarInfo.linkname.rstrip('/').replace('/', os.path.sep));
     1560                        sParentDir    = os.path.dirname(sLinkFile);
     1561                        try:    os.unlink(sLinkFile);
     1562                        except: pass;
     1563                        if sParentDir is not ''  and  not os.path.exists(sParentDir):
     1564                            os.makedirs(sParentDir);
     1565                        try:    os.link(sLinkTarget, sLinkFile);
     1566                        except: shutil.copy2(sLinkTarget, sLinkFile);
     1567                    else:
     1568                        if oTarInfo.isdir():
     1569                            # Just make sure the user (we) got full access to dirs.  Don't bother getting it 100% right.
     1570                            oTarInfo.mode |= 0x1c0; # (octal: 0700)
     1571                        oTarFile.extract(oTarInfo, sDstDir);
     1572                    asMembers.append(os.path.join(sDstDir, oTarInfo.name.replace('/', os.path.sep)));
     1573            except Exception as oXcpt:
     1574                fnError('Error unpacking "%s" member "%s" into "%s": %s' % (sArchive, oTarInfo.name, sDstDir, oXcpt));
     1575                for sAttr in [ 'name', 'linkname', 'type', 'mode', 'size', 'mtime', 'uid', 'uname', 'gid', 'gname' ]:
     1576                    fnError('Info: %8s=%s' % (sAttr, getattr(oTarInfo, sAttr),));
     1577                for sFn in [ 'isdir', 'isfile', 'islnk', 'issym' ]:
     1578                    fnError('Info: %8s=%s' % (sFn, getattr(oTarInfo, sFn)(),));
     1579                asMembers = None;
     1580                break;
     1581    except Exception as oXcpt:
     1582        fnError('Error unpacking "%s" into "%s": %s' % (sArchive, sDstDir, oXcpt));
     1583        asMembers = None;
     1584
     1585    #
     1586    # Finally, close it.
     1587    #
     1588    try: oTarFile.close();
     1589    except Exception as oXcpt:
     1590        fnError('Error closing "%s" after unpacking into "%s": %s' % (sArchive, sDstDir, oXcpt));
     1591        asMembers = None;
     1592
     1593    return asMembers;
     1594
     1595
     1596def unpackFile(sArchive, sDstDir, fnLog, fnError = None, fnFilter = None):
     1597    # type: (string, string, (string) -> None, (string) -> None, (string) -> bool) -> list[string]
    14421598    """
    14431599    Unpacks the given file if it has a know archive extension, otherwise do
    14441600    nothing.
     1601
     1602    fnLog & fnError both take a string parameter.
     1603
     1604    fnFilter takes a member name (string) and returns True if it's included
     1605    and False if excluded.
    14451606
    14461607    Returns list of the extracted files (full path) on success.
     
    14481609    Returns None on failure.  Raises no exceptions.
    14491610    """
    1450     if fnError is None:
    1451         fnError = fnLog;
    1452 
    1453     asMembers = [];
    1454 
    14551611    sBaseNameLower = os.path.basename(sArchive).lower();
     1612
     1613    #
     1614    # Zip file?
     1615    #
    14561616    if sBaseNameLower.endswith('.zip'):
    1457         fnLog('Unzipping "%s" to "%s"...' % (sArchive, sDstDir));
    1458         try:
    1459             oZipFile = zipfile.ZipFile(sArchive, 'r')
    1460             asMembers = oZipFile.namelist();
    1461             for sMember in asMembers:
    1462                 if sMember.endswith('/'):
    1463                     os.makedirs(os.path.join(sDstDir, sMember.replace('/', os.path.sep)), 0775);
    1464                 else:
    1465                     oZipFile.extract(sMember, sDstDir);
    1466             oZipFile.close();
    1467         except Exception, oXcpt:
    1468             fnError('Error unpacking "%s" into "%s": %s' % (sArchive, sDstDir, oXcpt));
    1469             return None;
    1470 
    1471     elif sBaseNameLower.endswith('.tar') \
     1617        return unpackZipFile(sArchive, sDstDir, fnLog, fnError, fnFilter);
     1618
     1619    #
     1620    # Tarball?
     1621    #
     1622    if   sBaseNameLower.endswith('.tar') \
    14721623      or sBaseNameLower.endswith('.tar.gz') \
    14731624      or sBaseNameLower.endswith('.tgz') \
    14741625      or sBaseNameLower.endswith('.tar.bz2'):
    1475         fnLog('Untarring "%s" to "%s"...' % (sArchive, sDstDir));
    1476         try:
    1477             oTarFile = tarfile.open(sArchive, 'r:*');
    1478             asMembers = [oTarInfo.name for oTarInfo in oTarFile.getmembers()];
    1479             oTarFile.extractall(sDstDir);
    1480             oTarFile.close();
    1481         except Exception, oXcpt:
    1482             fnError('Error unpacking "%s" into "%s": %s' % (sArchive, sDstDir, oXcpt));
    1483             return None;
    1484 
    1485     else:
    1486         fnLog('Not unpacking "%s".' % (sArchive,));
    1487         return [];
    1488 
    1489     #
    1490     # Change asMembers to local slashes and prefix with path.
    1491     #
    1492     asMembersRet = [];
    1493     for sMember in asMembers:
    1494         asMembersRet.append(os.path.join(sDstDir, sMember.replace('/', os.path.sep)));
    1495 
    1496     return asMembersRet;
     1626        return unpackTarFile(sArchive, sDstDir, fnLog, fnError, fnFilter);
     1627
     1628    #
     1629    # Cannot classify it from the name, so just return that to the caller.
     1630    #
     1631    fnLog('Not unpacking "%s".' % (sArchive,));
     1632    return [];
    14971633
    14981634
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette