VirtualBox

Ignore:
Timestamp:
Oct 13, 2022 1:12:12 PM (2 years ago)
Author:
vboxsync
Message:

Validation Kit/batch/quota.py: Simplified script by not being that generic anymore, addressed @todos. Tested locally on a partial DB dump + testset data. See ArchiveDelFilesBatchJob::init for configuration.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/ValidationKit/testmanager/batch/quota.py

    r96822 r97129  
    4444
    4545# Standard python imports
    46 from datetime import datetime, timedelta
    4746import sys
    4847import os
    49 from optparse import OptionParser, OptionGroup;  # pylint: disable=deprecated-module
     48from optparse import OptionParser;  # pylint: disable=deprecated-module
    5049import shutil
    5150import tempfile;
    52 import time;
    5351import zipfile;
    5452
     
    5856
    5957# Test Manager imports
    60 from common                     import utils;
    6158from testmanager                import config;
    62 
    63 ##
    64 ## @todo r=bird: Since this is CLEARLY COPIED from filearchiver.py, why doesn't the log start with a svn copy?
    65 ##
    66 ## This will be rewritten as a single purpose job. I don't want to have any 'command' arguments, there
    67 ## should be as few arguments as possible since this is a cronjob and it gets most of it's info from the
    68 ## config.py file rather than the command line.
    69 ##
    70 ## I don't think processDir() will work at all in the form it is.  That one is assuming the flat structure
    71 ## of the directory containing the most recent tests that should be zipped up and put on the storage server.
    72 ## What this script needs to process is the nested (by year/month/day/(hour / x * x)) layout and handle those
    73 ## files.  It could be a good simplification to get the selection of TestSets to run on from the database.
    74 ##
    75 ## On reflection, we can either have a window of files that get rescanned (e.g. starting two weeks and going
    76 ## back a two more) everytime the batch job runs, OR we could add a flag in the database indicating whether
    77 ## we've processed a TestSet (maybe a quota pass number).  The latter would be much more efficient but
    78 ## require a rather large database change (adding a column to a table with close to 20 million rows).
    79 ##
     59from testmanager.core.db        import TMDatabaseConnection;
     60from testmanager.core.testset   import TestSetData, TestSetLogic;
     61
    8062
    8163class ArchiveDelFilesBatchJob(object): # pylint: disable=too-few-public-methods
     
    8466    """
    8567
    86     def __init__(self, sCmd, oOptions):
     68    def __init__(self, oOptions):
    8769        """
    8870        Parse command line
    8971        """
    90         self.fVerbose       = oOptions.fVerbose;
    91         self.sCmd           = sCmd;
    92         self.sSrcDir        = oOptions.sSrcDir;
    93         if not self.sSrcDir :
    94             self.sSrcDir    = config.g_ksFileAreaRootDir;    ## @todo r=bird: This CANNOT be right.
    95         self.sDstDir        = config.g_ksZipFileAreaRootDir; ## @todo r=bird: This isn't used.
    96         self.sTempDir       = oOptions.sTempDir;
    97         if not self.sTempDir:
    98             self.sTempDir   = tempfile.gettempdir();
    99         #self.oTestSetLogic = TestSetLogic(TMDatabaseConnection(self.dprint if self.fVerbose else None));
    100         #self.oTestSetLogic = TestSetLogic(TMDatabaseConnection(None));
    101         self.fDryRun        = oOptions.fDryRun;
    102         self.asFileExt      = [];
    103         self.asFileExt      = oOptions.asFileExt and oOptions.asFileExt.split(',');
    104         self.cOlderThanDays = oOptions.cOlderThanDays;
    105         self.cbBiggerThan   = oOptions.uBiggerThanKb * 1024; # Kilobyte (kB) to bytes.
    106         self.fForce         = oOptions.fForce;
     72        self.fDryRun            = oOptions.fDryRun;
     73        self.fVerbose           = oOptions.fVerbose;
     74        self.sTempDir           = tempfile.gettempdir();
     75
     76        self.dprint('Connecting to DB ...');
     77        self.oTestSetLogic      = TestSetLogic(TMDatabaseConnection(self.dprint if self.fVerbose else None));
     78
     79        ## Fetches (and handles) all testsets up to this age (in hours).
     80        self.uHoursAgeToHandle  = 24;
     81        ## Always remove files with these extensions.
     82        self.asRemoveFileExt    = [ 'webm' ];
     83        ## Always remove files which are bigger than this limit.
     84        #  Set to 0 to disable.
     85        self.cbRemoveBiggerThan = 128 * 1024 * 1024;
    10786
    10887    def dprint(self, sText):
     
    175154        return fRc;
    176155
    177     def _processTestSetZip(self, idTestSet, sFile, sCurDir):
    178         """
    179         Worker for processDir.
    180         Same return codes as processDir.
    181         """
    182 
     156    def _processTestSetZip(self, idTestSet, sSrcZipFileAbs):
     157        """
     158        Worker for processOneTestSet, which processes the testset's ZIP file.
     159
     160        Returns success indicator.
     161        """
    183162        _ = idTestSet
    184163
    185         sSrcZipFileAbs = os.path.join(sCurDir, sFile);
    186         print('Processing ZIP archive "%s" ...' % (sSrcZipFileAbs));
    187 
    188164        with tempfile.NamedTemporaryFile(dir=self.sTempDir, delete=False) as tmpfile:
    189             sDstZipFileAbs = tmpfile.name
    190         self.dprint('Using temporary ZIP archive "%s"' % (sDstZipFileAbs));
     165            sDstZipFileAbs = tmpfile.name;
    191166
    192167        fRc = True;
     
    194169        try:
    195170            oSrcZipFile = zipfile.ZipFile(sSrcZipFileAbs, 'r');                             # pylint: disable=consider-using-with
     171            self.dprint('Processing ZIP archive "%s" ...' % (sSrcZipFileAbs));
    196172            try:
    197173                if not self.fDryRun:
    198174                    oDstZipFile = zipfile.ZipFile(sDstZipFileAbs, 'w');                     # pylint: disable=consider-using-with
     175                    self.dprint('Using temporary ZIP archive "%s"' % (sDstZipFileAbs));
    199176                try:
     177                    #
     178                    # First pass: Gather information if we need to do some re-packing.
     179                    #
     180                    fDoRepack = False;
     181                    aoFilesToRepack = [];
    200182                    for oCurFile in oSrcZipFile.infolist():
    201 
    202183                        self.dprint('Handling File "%s" ...' % (oCurFile.filename))
    203184                        sFileExt = os.path.splitext(oCurFile.filename)[1];
    204185
    205                         fDoRepack = True; # Re-pack all unless told otherwise.
    206 
    207186                        if  sFileExt \
    208                         and sFileExt[1:] in self.asFileExt:
     187                        and sFileExt[1:] in self.asRemoveFileExt:
    209188                            self.dprint('\tMatches excluded extensions')
    210                             fDoRepack = False;
    211 
    212                         if  self.cbBiggerThan \
    213                         and oCurFile.file_size > self.cbBiggerThan:
    214                             self.dprint('\tIs bigger than %d bytes (%d bytes)' % (self.cbBiggerThan, oCurFile.file_size))
    215                             fDoRepack = False;
    216 
    217                         if fDoRepack \
    218                            and self.cOlderThanDays:
    219                             tsMaxAge  = datetime.now() - timedelta(days = self.cOlderThanDays);
    220                             tsFile    = datetime(year   = oCurFile.date_time[0],
    221                                                  month  = oCurFile.date_time[1],
    222                                                  day    = oCurFile.date_time[2],
    223                                                  hour   = oCurFile.date_time[3],
    224                                                  minute = oCurFile.date_time[4],
    225                                                  second = oCurFile.date_time[5]);
    226                             if tsFile < tsMaxAge:
    227                                 self.dprint('\tIs older than %d days (%s)' % (self.cOlderThanDays, tsFile))
    228                                 fDoRepack = False;
    229 
    230                         if fDoRepack:
    231                             self.dprint('Re-packing file "%s"' % (oCurFile.filename,))
    232                             if not self.fDryRun:
    233                                 oBuf = oSrcZipFile.read(oCurFile);
    234                                 oDstZipFile.writestr(oCurFile, oBuf);
     189                            fDoRepack = True;
     190                        elif     self.cbRemoveBiggerThan \
     191                             and oCurFile.file_size > self.cbRemoveBiggerThan:
     192                            self.dprint('\tIs bigger than %d bytes (%d bytes)' % (self.cbRemoveBiggerThan, oCurFile.file_size))
     193                            fDoRepack = True;
    235194                        else:
    236                             print('Deleting file "%s"' % (oCurFile.filename,))
     195                            aoFilesToRepack.append(oCurFile);
     196
     197                    if not fDoRepack:
     198                        oSrcZipFile.close();
     199                        self.dprint('No re-packing necessary, skipping ZIP archive');
     200                        return True;
     201
     202                    #
     203                    # Second pass: Re-pack all needed files into our temporary ZIP archive.
     204                    #
     205                    for oCurFile in aoFilesToRepack:
     206                        self.dprint('Re-packing file "%s"' % (oCurFile.filename,))
     207                        if not self.fDryRun:
     208                            oBuf = oSrcZipFile.read(oCurFile);
     209                            oDstZipFile.writestr(oCurFile, oBuf);
     210
    237211                    if not self.fDryRun:
    238212                        oDstZipFile.close();
     213
    239214                except Exception as oXcpt4:
    240                     print(oXcpt4);
    241                     return (None, 'Error handling file "%s" of archive "%s": %s'
    242                                 % (oCurFile.filename, sSrcZipFileAbs, oXcpt4,), None);
     215                    print('Error handling file "%s" of archive "%s": %s' % (oCurFile.filename, sSrcZipFileAbs, oXcpt4,));
     216                    return False;
    243217
    244218                oSrcZipFile.close();
     
    249223
    250224            except Exception as oXcpt3:
    251                 return (None, 'Error creating temporary ZIP archive "%s": %s' % (sDstZipFileAbs, oXcpt3,), None);
     225                print('Error creating temporary ZIP archive "%s": %s' % (sDstZipFileAbs, oXcpt3,));
     226                return False;
     227
    252228        except Exception as oXcpt1:
    253229            # Construct a meaningful error message.
    254             try:
    255                 if os.path.exists(sSrcZipFileAbs):
    256                     return (None, 'Error opening "%s": %s' % (sSrcZipFileAbs, oXcpt1), None);
    257                 if not os.path.exists(sFile):
    258                     return (None, 'File "%s" not found. [%s]' % (sSrcZipFileAbs, sFile), None);
    259                 return (None, 'Error opening "%s" inside "%s": %s' % (sSrcZipFileAbs, sFile, oXcpt1), None);
    260             except Exception as oXcpt2:
    261                 return (None, 'WTF? %s; %s' % (oXcpt1, oXcpt2,), None);
     230            if os.path.exists(sSrcZipFileAbs):
     231                print('Error: Opening file "%s" failed: %s' % (sSrcZipFileAbs, oXcpt1));
     232            else:
     233                print('Error: File "%s" not found.' % (sSrcZipFileAbs,));
     234            return False;
    262235
    263236        return fRc;
    264237
    265238
    266     def processDir(self, sCurDir):
    267         """
    268         Process the given directory (relative to sSrcDir and sDstDir).
     239    def processOneTestSet(self, idTestSet, sBasename):
     240        """
     241        Processes one single testset.
     242
    269243        Returns success indicator.
    270244        """
    271245
    272         if not self.asFileExt:
    273             print('Must specify at least one file extension to delete.');
    274             return False;
    275 
    276         if self.fVerbose:
    277             self.dprint('Processing directory: %s' % (sCurDir,));
    278 
    279         #
    280         # Sift thought the directory content, collecting subdirectories and
    281         # sort relevant files by test set.
    282         # Generally there will either be subdirs or there will be files.
    283         #
    284         asSubDirs = [];
    285         dTestSets = {};
    286         sCurPath = os.path.abspath(os.path.join(self.sSrcDir, sCurDir));
    287         for sFile in os.listdir(sCurPath):
    288             if os.path.isdir(os.path.join(sCurPath, sFile)):
    289                 if sFile not in [ '.', '..' ]:
    290                     asSubDirs.append(sFile);
    291             elif sFile.startswith('TestSet-') \
    292             and  sFile.endswith('zip'):
    293                 # Parse the file name. ASSUMES 'TestSet-%d-filename' format.
    294                 iSlash1 = sFile.find('-');
    295                 iSlash2 = sFile.find('-', iSlash1 + 1);
    296                 if iSlash2 <= iSlash1:
    297                     self.warning('Bad filename (1): "%s"' % (sFile,));
    298                     continue;
    299 
    300                 try:    idTestSet = int(sFile[(iSlash1 + 1):iSlash2]);
    301                 except:
    302                     self.warning('Bad filename (2): "%s"' % (sFile,));
    303                     if self.fVerbose:
    304                         self.dprint('\n'.join(utils.getXcptInfo(4)));
    305                     continue;
    306 
    307                 if idTestSet <= 0:
    308                     self.warning('Bad filename (3): "%s"' % (sFile,));
    309                     continue;
    310 
    311                 if iSlash2 + 2 >= len(sFile):
    312                     self.warning('Bad filename (4): "%s"' % (sFile,));
    313                     continue;
    314                 sName = sFile;
    315 
    316                 # Add it.
    317                 if idTestSet not in dTestSets:
    318                     dTestSets[idTestSet] = [];
    319                 asTestSet = dTestSets[idTestSet];
    320                 asTestSet.append(sName);
    321 
    322         #
    323         # Test sets.
    324         #
    325246        fRc = True;
    326         for idTestSet, oTestSet in dTestSets.items():
    327             try:
    328                 if self._processTestSetZip(idTestSet, oTestSet[0], sCurDir) is not True:
    329                     fRc = False;
    330             except:
    331                 self.warning('TestSet %d: Exception in _processTestSetZip:\n%s' % (idTestSet, '\n'.join(utils.getXcptInfo()),));
    332                 fRc = False;
    333 
    334         #
    335         # Sub dirs.
    336         #
    337         self.dprint('Processing sub-directories');
    338         for sSubDir in asSubDirs:
    339             if self.processDir(os.path.join(sCurDir, sSubDir)) is not True:
    340                 fRc = False;
    341 
    342         #
    343         # Try Remove the directory iff it's not '.' and it's been unmodified
    344         # for the last 24h (race protection).
    345         #
    346         if sCurDir != '.':
    347             try:
    348                 fpModTime = float(os.path.getmtime(sCurPath));
    349                 if fpModTime + (24*3600) <= time.time():
    350                     if utils.noxcptRmDir(sCurPath) is True:
    351                         self.dprint('Removed "%s".' % (sCurPath,));
    352             except:
    353                 pass;
     247        self.dprint('Processing testset %d' % (idTestSet,));
     248
     249        # Construct absolute ZIP file path.
     250        # ZIP is hardcoded in config, so do here.
     251        sSrcZipFileAbs = os.path.join(config.g_ksZipFileAreaRootDir, sBasename + '.zip');
     252
     253        if self._processTestSetZip(idTestSet, sSrcZipFileAbs) is not True:
     254            fRc = False;
     255
     256        return fRc;
     257
     258    def processTestSets(self):
     259        """
     260        Processes all testsets according to the set configuration.
     261
     262        Returns success indicator.
     263        """
     264
     265        aoTestSets = self.oTestSetLogic.fetchByAge(cHoursBack = self.uHoursAgeToHandle);
     266        cTestSets = len(aoTestSets);
     267        print('Found %d entries in DB' % cTestSets);
     268        if not cTestSets:
     269            return True; # Nothing to do (yet).
     270
     271        fRc = True;
     272        for oTestSet in aoTestSets:
     273            fRc = self.processOneTestSet(oTestSet.idTestSet, oTestSet.sBaseFilename) and fRc;
     274            # Keep going.
    354275
    355276        return fRc;
     
    361282        # Parse options.
    362283        #
    363 
    364         if len(sys.argv) < 2:
    365             print('Must specify a main command!\n');
    366             return 1;
    367 
    368         sCommand = sys.argv[1];
    369 
    370         asCmds = [ 'delete-files' ];
    371         if sCommand not in asCmds:
    372             print('Unknown main command! Must be one of: %s\n' % ', '.join(asCmds));
    373             return 1;
    374284
    375285        oParser = OptionParser();
     
    382292        oParser.add_option('-d', '--dry-run', dest = 'fDryRun',  action = 'store_true',  default = False,
    383293                           help = 'Dry run, do not make any changes.');
    384         oParser.add_option('--source-dir', type = 'string', dest = 'sSrcDir',
    385                            help = 'Specifies the source directory to process.');
    386         oParser.add_option('--temp-dir', type = 'string', dest = 'sTempDir',
    387                            help = 'Specifies the temp directory to use.');
    388         oParser.add_option('--force', dest = 'fForce', action = 'store_true', default = False,
    389                            help = 'Forces the operation.');
    390 
    391         if sCommand == 'delete-files':
    392             oGroup = OptionGroup(oParser, "File deletion options", "Deletes files from testset archives.");
    393             oGroup.add_option('--file-ext', type = 'string', dest = 'asFileExt',
    394                               help = 'Selects files with the given extensions.');
    395             oGroup.add_option('--older-than-days', type = 'int', dest = 'cOlderThanDays', default = 0,
    396                               help = 'Selects all files which are older than NUM days.');
    397             oGroup.add_option('--bigger-than-kb', type = 'int', dest = 'uBiggerThanKb', default = 0,
    398                               help = 'Selects all files which are bigger than (kB).\nA kilobyte is 1024 bytes.');
    399             oParser.add_option_group(oGroup);
    400 
    401         (oOptions, asArgs) = oParser.parse_args(sys.argv[2:]);
     294
     295        (oOptions, asArgs) = oParser.parse_args(sys.argv[1:]);
    402296        if asArgs != []:
    403297            oParser.print_help();
     
    414308        fRc = False;
    415309
    416         if sCommand == 'delete-files':
    417             print('Job: Deleting files from archive');
    418             oBatchJob = ArchiveDelFilesBatchJob(sCommand, oOptions);
    419             fRc = oBatchJob.processDir(oBatchJob.sSrcDir);
     310        oBatchJob = ArchiveDelFilesBatchJob(oOptions);
     311        fRc = oBatchJob.processTestSets();
    420312
    421313        if oOptions.fVerbose:
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette