VirtualBox

Changeset 65855 in vbox for trunk/src/VBox


Ignore:
Timestamp:
Feb 23, 2017 12:23:08 PM (8 years ago)
Author:
vboxsync
Message:

testdriver/reporter.py: Redid locking to account for garbage collection causing trouble when it calls del methods with log statements. Since those statements can be very useful, the reporter now disables GC while it's owning the lock, but allow recursion in case GC runs before we disables it or immediately after reenabling it.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/ValidationKit/testdriver/reporter.py

    r65839 r65855  
    3535import datetime
    3636import errno
     37import gc
    3738import os
    3839import os.path
     
    4849g_oReporter = None;                     # type: ReporterBase
    4950g_sReporterName = None;
    50 g_oLock = threading.Lock();
    51 
    52 # Lock wrapper:
    53 class LockWrapper(object):
    54     """ Temporary lock wrapper to debug deadlocks on the macs. """
    55 
    56     kcMsTimeout = 15000;
     51
     52
     53class ReporterLock(object):
     54    """
     55    Work around problem with garbage collection triggering __del__ method with
     56    logging while inside the logger lock and causing a deadlock.
     57    """
    5758
    5859    def __init__(self, sName):
    59         self.sName  = sName;
    60         self.oLock  = threading.Lock();
    61         self.oOwner = None;
    62         self.oPrevOwner = None;
    63         self.cAcquired = 0;
    64         self.oReportingError = None;
    65 
    66     def acquire(self, blocking = True):
     60        self.sName          = sName;
     61        self.oLock          = threading.RLock();
     62        self.oOwner         = None;
     63        self.cRecursion     = 0;
     64        self.fRestoreGC     = False;
     65
     66    def acquire(self):
    6767        """ Acquire the lock. """
    6868        oSelf = threading.current_thread();
    6969
    70         # Special case for avoiding recursive error reports via the error method.
    71         if self.oReportingError == oSelf:
    72             return True;
    73 
    74         # Check ownership.
    75         if oSelf == self.oOwner:
    76             self.error('Recursive lock %s entry on thread %s' % (self.sName, oSelf,));
    77 
    78         # Try acquire it (non-blocking).
    79         if not self.oLock.acquire(False):
    80             if not blocking:
    81                 return False;
    82             msStart = utils.timestampMilli();
    83             while True:
    84                 if self.oLock.acquire(False):
    85                     break;
    86                 cMsElapsed = utils.timestampMilli() - msStart;
    87                 if cMsElapsed > 1:
    88                     if cMsElapsed > self.kcMsTimeout:
    89                         self.error('Thread %s has waited more than %u seconds on lock %s to be released by %s'
    90                                    % (oSelf, self.kcMsTimeout / 1000, self.sName, self.oOwner,), fAllThreads = True);
    91                     time.sleep(0.001);
    92 
    93         # Got it. Update stats.
    94         self.oOwner = oSelf;
    95         self.cAcquired += 1;
     70        # Take the lock.
     71        if not self.oLock.acquire():
     72            return False;
     73
     74        self.oOwner      = oSelf;
     75        self.cRecursion += 1;
     76
     77        # Disable GC to avoid __del__ w/ log statement randomly reenter the logger.
     78        self.fRestoreGC  = gc.isenabled();
     79        if self.fRestoreGC:
     80            gc.disable();
     81
    9682        return True;
    9783
     
    9985        """ Release the lock. """
    10086        oSelf = threading.current_thread();
    101         # Skip if we're in the error method (see acquire).
    102         if self.oReportingError == oSelf:
    103             pass;
    104         # Check ownership.
    105         elif oSelf != self.oOwner:
    106             self.error('Thread %s is trying to release %s without owning it' % (oSelf, self.sName,));
    107         # Housekeeping first, then relase for real.
    108         else:
    109             self.oPrevOwner = oSelf;
     87
     88        # Check the ownership.
     89        if oSelf != self.oOwner:
     90            raise threading.ThreadError();
     91
     92        # Drop one recursion.
     93        self.cRecursion -= 1;
     94        if self.cRecursion <= 0:
     95
     96            # Final recursion. Clear owner and re-enable GC.
    11097            self.oOwner = None;
    111             self.oLock.release();
    112 
    113     def error(self, sMessage, fAllThreads = False):
    114         """ Reports a problem. Raises exception."""
    115         #
    116         # Get the ID of the current thread and the current and previous lock owners.
    117         #
    118         oSelf = threading.current_thread();
    119         try:    idSelf = threading.current_thread().ident;
    120         except: idSelf = -1;
    121 
    122         try:    idOwner = self.oOwner.ident if self.oOwner is not None else -2;
    123         except: idOwner = -1;
    124         try:    sOwnerNm = self.oOwner.name if self.oOwner is not None else '<none>';
    125         except: sOwnerNm = -1;
    126 
    127         try:    idPrevOwner = self.oPrevOwner.ident if self.oPrevOwner is not None else -2;
    128         except: idPrevOwner = -1;
    129         try:    sPrevOwnerNm = self.oPrevOwner.name if self.oPrevOwner is not None else '<none>';
    130         except: sPrevOwnerNm = -1;
    131 
    132         # First to stdout.
    133         print '!Lock trouble! %s idSelf=%s (%s)' % ( self.sName, idSelf, oSelf,);
    134         print '!Lock trouble! %s' % (sMessage,);
    135         print '!Lock trouble! %s idOwner=%s sOwnerNm=%s' % (self.sName, idOwner, sOwnerNm,);
    136         print '!Lock trouble! %s idPrevOwner=%s sPrevOwnerNm=%s' % (self.sName, idPrevOwner, sPrevOwnerNm,);
    137         print '!Lock trouble! %s cAcquired=%d' % (self.sName, self.cAcquired,);
    138         sys.stdout.flush()
    139 
    140         # Append to special log file (unbuffered).
    141         try:
    142             oFile = open('/tmp/reporter-lock-validation.log', 'a', 0)
    143         except:
    144             oFile = sys.stderr;
    145         oFile.write('lock=%s tid=%s (%s) %s\n' % (self.sName, idSelf, oSelf, sMessage,));
    146         oFile.write('lock=%s idOwner=%s sOwnerNm=%s idPrevOwner=%s sPrevOwnerNm=%s cAcquired=%s\n'
    147                     % (self.sName, idOwner, sOwnerNm, idPrevOwner, sPrevOwnerNm, self.cAcquired));
    148 
    149         #
    150         # Get thread stack(s).
    151         #
    152         asStacks = [];
    153         try:
    154             for idThread, oStack in sys._current_frames().items(): # >=2.5, a bit ugly - pylint: disable=W0212
    155                 if not fAllThreads and idThread != idSelf and idSelf != -1:
    156                     continue;
    157                 try:
    158                     if asStacks:
    159                         asStacks.append('');
    160                     asStacks.append('Thread %s (%#x)' % (idThread, idThread));
    161                     try:
    162                         asInfo = traceback.format_stack(oStack, 20);
    163                     except Exception as oXcpt:
    164                         asStacks.append('  Stack formatting exception #2: %s' % (oXcpt,));
    165                     else:
    166                         for sInfo in asInfo:
    167                             asStacks.extend(sInfo.splitlines());
    168                 except Exception as oXcpt:
    169                     asStacks.append('  Stack formatting exception #1: %s' % (oXcpt,));
    170         except Exception as oXcpt:
    171             asStacks.append('  Stack formatting exception #0: %s' % (oXcpt,));
    172 
    173         #
    174         # Write the stacks to stdout then to the log file.
    175         #
    176         for sLine in asStacks:
    177             print '!Lock trouble! %s' % (sLine,);
    178         sys.stderr.flush()
    179 
    180         for sLine in asStacks:
    181             oFile.write('%s\n' % (sLine,));
    182 
    183         if oFile != sys.stderr:
    184             try:    oFile.close();
    185             except: pass;
    186 
    187         #
    188         # Now for the risky part, try log it using the logger.
    189         #
    190         try:
    191             self.oReportingError = oSelf;
    192             g_oReporter.log(1, '!Lock %s trouble! %s' % (self.sName, sMessage,));
    193             for sLine in asStacks:
    194                 g_oReporter.log(1, '!Lock %s trouble! %s' % (self.sName, sLine,));
    195         except:
    196             pass;
    197         finally:
    198             self.oReportingError = None;
    199         raise Exception(sMessage);
    200 
    201 if utils.getHostOs() in ['darwin', 'solaris', 'linux']:
    202     g_oLock = LockWrapper('reporter');
     98            if self.fRestoreGC:
     99                self.fRestoreGC = False;
     100                gc.enable();
     101
     102        self.oLock.release();
     103
     104## Reporter lock.
     105g_oLock = ReporterLock('reporter');
    203106
    204107
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette