VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 61476

Last change on this file since 61476 was 61460, checked in by vboxsync, 9 years ago

investigateVBoxUnitTest: bugfix for nested failures (we only care about unit test level).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 40.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 61460 2016-06-04 00:19:16Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 61460 $"
37
38
39# Standard python imports
40import sys;
41import os;
42from optparse import OptionParser;
43
44# Add Test Manager's modules path
45g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
46sys.path.append(g_ksTestManagerDir);
47
48# Test Manager imports
49from testmanager.core.db import TMDatabaseConnection;
50from testmanager.core.build import BuildDataEx;
51from testmanager.core.failurereason import FailureReasonLogic;
52from testmanager.core.testbox import TestBoxLogic, TestBoxData;
53from testmanager.core.testcase import TestCaseDataEx;
54from testmanager.core.testgroup import TestGroupData;
55from testmanager.core.testset import TestSetLogic, TestSetData;
56from testmanager.core.testresults import TestResultLogic, TestResultFileData;
57from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
58from testmanager.core.useraccount import UserAccountLogic;
59
60
61class VirtualTestSheriffCaseFile(object):
62 """
63 A failure investigation case file.
64
65 """
66
67
68 ## Max log file we'll read into memory. (256 MB)
69 kcbMaxLogRead = 0x10000000;
70
71 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
72 self.oSheriff = oSheriff;
73 self.oTestSet = oTestSet; # TestSetData
74 self.oTree = oTree; # TestResultDataEx
75 self.oBuild = oBuild; # BuildDataEx
76 self.oTestBox = oTestBox; # TestBoxData
77 self.oTestGroup = oTestGroup; # TestGroupData
78 self.oTestCase = oTestCase; # TestCaseDataEx
79 self.sMainLog = ''; # The main log file. Empty string if not accessible.
80
81 # Generate a case file name.
82 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
83 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
84 % ( self.oTestSet.idTestSet,
85 self.oTestCase.sName,
86 self.oTestBox.sName,
87 self.oTestBox.sOs,
88 self.oTestBox.sOsVersion,
89 self.oTestBox.sCpuArch,
90 self.oTestBox.sCpuName,
91 self.oTestBox.sCpuVendor,
92 self.oBuild.oCat.sProduct,
93 self.oBuild.oCat.sBranch,
94 self.oBuild.oCat.sType,
95 self.oBuild.iRevision, );
96
97 # Investigation notes.
98 self.tReason = None; # None or one of the ktReason_XXX constants.
99 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
100 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
101
102 #
103 # Reason.
104 #
105
106 def noteReason(self, tReason):
107 """ Notes down a possible reason. """
108 self.oSheriff.dprint('noteReason: %s -> %s' % (self.tReason, tReason,));
109 self.tReason = tReason;
110 return True;
111
112 def noteReasonForId(self, tReason, idTestResult, sComment = None):
113 """ Notes down a possible reason for a specific test result. """
114 self.oSheriff.dprint('noteReasonForId: %u: %s -> %s%s'
115 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
116 (' (%s)' % (sComment,)) if sComment is not None else ''));
117 self.dReasonForResultId[idTestResult] = tReason;
118 if sComment is not None:
119 self.dCommentForResultId[idTestResult] = sComment;
120 return True;
121
122
123 #
124 # Test classification.
125 #
126
127 def isVBoxTest(self):
128 """ Test classification: VirtualBox (using the build) """
129 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
130
131 def isVBoxUnitTest(self):
132 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
133 return self.isVBoxTest() \
134 and self.oTestCase.sName.lower() == 'unit tests';
135
136 def isVBoxInstallTest(self):
137 """ Test case classification: VirtualBox Guest installation test. """
138 return self.isVBoxTest() \
139 and self.oTestCase.sName.lower().startswith('install:');
140
141 def isVBoxSmokeTest(self):
142 """ Test case classification: Smoke test. """
143 return self.isVBoxTest() \
144 and self.oTestCase.sName.lower().startswith('smoketest');
145
146
147 #
148 # Utility methods.
149 #
150
151 def getMainLog(self):
152 """
153 Tries to reads the main log file since this will be the first source of information.
154 """
155 if len(self.sMainLog) > 0:
156 return self.sMainLog;
157 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
158 if oFile is not None:
159 try:
160 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
161 except Exception as oXcpt:
162 self.oSheriff.vprint('Error reading main log file: %s' % (oXcpt,))
163 self.sMainLog = '';
164 else:
165 self.oSheriff.vprint('Error opening main log file: %s' % (oSizeOrError,));
166 return self.sMainLog;
167
168 def getLogFile(self, oFile):
169 """
170 Tries to reads the given file as a utf-8 log file.
171 oFile is a TestFileDataEx instance.
172 Returns empty string if problems opening or reading the file.
173 """
174 sContent = '';
175 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
176 if oFile is not None:
177 try:
178 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
179 except Exception as oXcpt:
180 self.oSheriff.vprint('Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
181 else:
182 self.oSheriff.vprint('Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
183 return sContent;
184
185
186 def isSingleTestFailure(self):
187 """
188 Figure out if this is a single test failing or if it's one of the
189 more complicated ones.
190 """
191 if self.oTree.cErrors == 1:
192 return True;
193 if self.oTree.deepCountErrorContributers() <= 1:
194 return True;
195 return False;
196
197
198
199class VirtualTestSheriff(object): # pylint: disable=R0903
200 """
201 Add build info into Test Manager database.
202 """
203
204 ## The user account for the virtual sheriff.
205 ksLoginName = 'vsheriff';
206
207 def __init__(self):
208 """
209 Parse command line.
210 """
211 self.oDb = None;
212 self.tsNow = None;
213 self.oTestResultLogic = None;
214 self.oTestSetLogic = None;
215 self.oFailureReasonLogic = None; # FailureReasonLogic;
216 self.oTestResultFailureLogic = None; # TestResultFailureLogic
217 self.oLogin = None;
218 self.uidSelf = -1;
219 self.oLogFile = None;
220 self.asBsodReasons = [];
221 self.asUnitTestReasons = [];
222
223 oParser = OptionParser();
224 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
225 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
226 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
227 help = 'Work period specified in hours. Defauls is 2 hours.');
228 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
229 help = 'Whether to commit the findings to the database. Default is a dry run.');
230 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
231 help = 'Quiet execution');
232 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
233 help = 'Where to log messages.');
234 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
235 help = 'Enables debug mode.');
236
237 (self.oConfig, _) = oParser.parse_args();
238
239 if self.oConfig.sLogFile is not None and len(self.oConfig.sLogFile) > 0:
240 self.oLogFile = open(self.oConfig.sLogFile, "a");
241 self.oLogFile.write('VirtualTestSheriff: $Revision: 61460 $ \n');
242
243
244 def eprint(self, sText):
245 """
246 Prints error messages.
247 Returns 1 (for exit code usage.)
248 """
249 print 'error: %s' % (sText,);
250 if self.oLogFile is not None:
251 self.oLogFile.write('error: %s\n' % (sText,));
252 return 1;
253
254 def dprint(self, sText):
255 """
256 Prints debug info.
257 """
258 if self.oConfig.fDebug:
259 if not self.oConfig.fQuiet:
260 print 'debug: %s' % (sText, );
261 if self.oLogFile is not None:
262 self.oLogFile.write('debug: %s\n' % (sText,));
263 return 0;
264
265 def vprint(self, sText):
266 """
267 Prints verbose info.
268 """
269 if not self.oConfig.fQuiet:
270 print 'info: %s' % (sText,);
271 if self.oLogFile is not None:
272 self.oLogFile.write('info: %s\n' % (sText,));
273 return 0;
274
275
276 def selfCheck(self):
277 """ Does some self checks, looking up things we expect to be in the database and such. """
278 rcExit = 0;
279 for sAttr in dir(self.__class__):
280 if sAttr.startswith('ktReason_'):
281 tReason = getattr(self.__class__, sAttr);
282 oFailureReason = self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
283 if oFailureReason is None:
284 rcExit = self.eprint('Failured to find failure reason "%s" in category "%s" in the database!'
285 % (tReason[1], tReason[0],));
286
287 # Check the user account as well.
288 if self.oLogin is None:
289 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
290 if oLogin is None:
291 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
292 return rcExit;
293
294
295
296 def badTestBoxManagement(self):
297 """
298 Looks for bad test boxes and first tries once to reboot them then disables them.
299 """
300 rcExit = 0;
301
302 #
303 # We skip this entirely if we're running in the past and not in harmless debug mode.
304 #
305 if self.oConfig.cStartHoursAgo != 0 \
306 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
307 return rcExit;
308 tsNow = self.tsNow if self.oConfig.fDebug else None;
309 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
310 oTestBoxLogic = TestBoxLogic(self.oDb);
311
312 #
313 # Get list of bad test boxes for given period and check them out individually.
314 #
315 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow);
316 for idTestBox in aidBadTestBoxes:
317 # Skip if the testbox is already disabled or has a pending reboot command.
318 try:
319 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
320 except Exception as oXcpt:
321 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
322 continue;
323 if not oTestBox.fEnabled:
324 self.dprint('badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
325 % ( idTestBox, oTestBox.sName, ));
326 continue;
327 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
328 self.dprint('badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
329 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
330 continue;
331
332 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
333 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
334 cOkay = 0;
335 cBad = 0;
336 iFirstOkay = len(aoSets);
337 for iSet, oSet in enumerate(aoSets):
338 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
339 cBad += 1;
340 else:
341 ## @todo maybe check the elapsed time here, it could still be a bad run.
342 cOkay += 1;
343 if iFirstOkay > iSet:
344 iFirstOkay = iSet;
345 if iSet > 10:
346 break;
347
348 # We react if there are two or more bad-testbox statuses at the head of the
349 # history and at least three in the last 10 results.
350 if iFirstOkay >= 2 and cBad > 2:
351 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
352 self.vprint('Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
353 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
354 if self.oConfig.fRealRun is True:
355 try:
356 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
357 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
358 % (iFirstOkay, cBad, cOkay),);
359 except Exception as oXcpt:
360 rcExit = self.eprint('Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
361 else:
362 self.vprint('Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
363 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
364 if self.oConfig.fRealRun is True:
365 try:
366 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
367 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
368 % (iFirstOkay, cBad, cOkay),);
369 except Exception as oXcpt:
370 rcExit = self.eprint('Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
371 else:
372 self.dprint('badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
373 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
374 return rcExit;
375
376
377 ## @name Failure reasons we know.
378 ## @{
379 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
380 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
381 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
382 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
383 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
384 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
385 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
386 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
387 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
388 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
389 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
390 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
391 ## @}
392
393 ## BSOD category.
394 ksBsodCategory = 'BSOD';
395 ## Special reason indicating that the flesh and blood sheriff has work to do.
396 ksBsodAddNew = 'Add new BSOD';
397
398 ## Unit test category.
399 ksUnitTestCategory = 'Unit';
400 ## Special reason indicating that the flesh and blood sheriff has work to do.
401 ksUnitTestAddNew = 'Add new';
402
403 ## Used for indica that we shouldn't report anything for this test result ID and
404 ## consider promoting the previous error to test set level if it's the only one.
405 ktHarmless = ( 'Probably', 'Caused by previous error' );
406
407
408 def caseClosed(self, oCaseFile):
409 """
410 Reports the findings in the case and closes it.
411 """
412 #
413 # Log it and create a dReasonForReasultId we can use below.
414 #
415 dCommentForResultId = oCaseFile.dCommentForResultId;
416 if len(oCaseFile.dReasonForResultId) > 0:
417 # Must weed out ktHarmless.
418 dReasonForResultId = {};
419 for idKey, tReason in oCaseFile.dReasonForResultId.items():
420 if tReason is not self.ktHarmless:
421 dReasonForResultId[idKey] = tReason;
422 if len(dReasonForResultId) == 0:
423 self.vprint('TODO: Closing %s without a real reason, only %s.' % (oCaseFile.sName, oCaseFile.dReasonForResultId));
424 return False;
425
426 # Try promote to single reason.
427 atValues = dReasonForResultId.values();
428 fSingleReason = True;
429 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
430 self.dprint('Promoting single reason to whole set: %s' % (atValues[0],));
431 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
432 self.dprint('Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
433 else:
434 fSingleReason = False;
435 if fSingleReason:
436 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
437 if len(dCommentForResultId) > 0:
438 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
439 elif oCaseFile.tReason is not None:
440 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
441 else:
442 self.vprint('Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
443 return False;
444
445 self.vprint('Closing %s with following reason%s: %s'
446 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
447
448 #
449 # Add the test failure reason record(s).
450 #
451 for idTestResult, tReason in dReasonForResultId.items():
452 oFailureReason = self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
453 if oFailureReason is not None:
454 sComment = 'Set by $Revision: 61460 $' # Handy for reverting later.
455 if idTestResult in dCommentForResultId:
456 sComment += ': ' + dCommentForResultId[idTestResult];
457
458 oAdd = TestResultFailureData();
459 oAdd.initFromValues(idTestResult = idTestResult,
460 idFailureReason = oFailureReason.idFailureReason,
461 uidAuthor = self.uidSelf,
462 idTestSet = oCaseFile.oTestSet.idTestSet,
463 sComment = sComment,);
464 if self.oConfig.fRealRun:
465 try:
466 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
467 except Exception as oXcpt:
468 self.eprint('caseClosed: Exception "%s" while adding reason %s for %s'
469 % (oXcpt, oAdd, oCaseFile.sLongName,));
470 else:
471 self.eprint('caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
472 return True;
473
474 #
475 # Tools for assiting log parsing.
476 #
477
478 @staticmethod
479 def matchFollowedByLines(sStr, off, asFollowingLines):
480 """ Worker for isThisFollowedByTheseLines. """
481
482 # Advance off to the end of the line.
483 off = sStr.find('\n', off);
484 if off < 0:
485 return False;
486 off += 1;
487
488 # Match each string with the subsequent lines.
489 for iLine, sLine in enumerate(asFollowingLines):
490 offEnd = sStr.find('\n', off);
491 if offEnd < 0:
492 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
493 if len(sLine) > 0 and sStr.find(sLine, off, offEnd) < 0:
494 return False;
495
496 # next line.
497 off = offEnd + 1;
498
499 return True;
500
501 @staticmethod
502 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
503 """
504 Looks for a line contining sFirst which is then followed by lines
505 with the strings in asFollowingLines. (No newline chars anywhere!)
506 Returns True / False.
507 """
508 off = sStr.find(sFirst, 0);
509 while off >= 0:
510 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
511 return True;
512 off = sStr.find(sFirst, off + 1);
513 return False;
514
515 @staticmethod
516 def findAndReturnResetOfLine(sHaystack, sNeedle):
517 """
518 Looks for sNeedle in sHaystack.
519 Returns The text following the needle up to the end of the line.
520 Returns None if not found.
521 """
522 off = sHaystack.find(sNeedle);
523 if off < 0:
524 return None;
525 off += len(sNeedle)
526 offEol = sHaystack.find('\n', off);
527 if offEol < 0:
528 offEol = len(sHaystack);
529 return sHaystack[off:offEol]
530
531 @staticmethod
532 def findInAnyAndReturnResetOfLine(asHaystacks, sNeedle):
533 """
534 Looks for sNeedle in zeroe or more haystacks (asHaystack).
535 Returns The text following the first needed found up to the end of the line.
536 Returns None if not found.
537 """
538 for sHaystack in asHaystacks:
539 sRet = VirtualTestSheriff.findAndReturnResetOfLine(sHaystack, sNeedle);
540 if sRet is not None:
541 return sRet;
542 return None;
543
544
545 #
546 # The investigative units.
547 #
548
549 def investigateBadTestBox(self, oCaseFile):
550 """
551 Checks out bad-testbox statuses.
552 """
553 _ = oCaseFile;
554 return False;
555
556
557 def investigateVBoxUnitTest(self, oCaseFile):
558 """
559 Checks out a VBox unittest problem.
560 """
561
562 #
563 # Process simple test case failures first, using their name as reason.
564 # We do the reason management just like for BSODs.
565 #
566 cRelevantOnes = 0;
567 aoFailedResults = oCaseFile.oTree.getListOfFailures();
568 for oFailedResult in aoFailedResults:
569 if oFailedResult is oCaseFile.oTree:
570 self.vprint('TODO: toplevel failure');
571 cRelevantOnes += 1
572 elif oFailedResult.sName == 'Installing VirtualBox':
573 self.vprint('TODO: Installation failure');
574 cRelevantOnes += 1
575 elif oFailedResult.sName == 'Uninstalling VirtualBox':
576 self.vprint('TODO: Uninstallation failure');
577 cRelevantOnes += 1
578 elif oFailedResult.oParent is not None:
579 # Get the 2nd level node because that's where we'll find the unit test name.
580 while oFailedResult.oParent.oParent is not None:
581 oFailedResult = oFailedResult.oParent;
582
583 # Only report a failure once.
584 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
585 sKey = oFailedResult.sName;
586 if sKey.startswith('testcase/'):
587 sKey = sKey[9:];
588 if sKey in self.asUnitTestReasons:
589 tReason = ( self.ksUnitTestCategory, sKey );
590 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
591 else:
592 self.dprint('Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
593 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
594 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
595 cRelevantOnes += 1
596 else:
597 self.vprint('Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
598
599 #
600 # If we've caught all the relevant ones by now, report the result.
601 #
602 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
603 return self.caseClosed(oCaseFile);
604 return False;
605
606
607 ## Thing we search a main or VM log for to figure out why something went bust.
608 katSimpleMainAndVmLogReasons = [
609 # ( Whether to stop on hit, needle, reason tuple ),
610 ( False, 'GuruMeditation', ktReason_Guru_Generic ),
611 ( False, 'Guru Meditation', ktReason_Guru_Generic ),
612 ( True, 'VERR_IEM_INSTR_NOT_IMPLEMENTED', ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED ),
613 ( True, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED', ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED ),
614 ( True, 'VERR_TRPM_DONT_PANIC', ktReason_Guru_VERR_TRPM_DONT_PANIC ),
615 ( True, 'VERR_PGM_PHYS_PAGE_RESERVED', ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED ),
616 ( True, 'VINF_EM_TRIPLE_FAULT', ktReason_Guru_VINF_EM_TRIPLE_FAULT ),
617 ( False, 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))',
618 ktReason_XPCOM_NS_ERROR_CALL_FAILED ),
619 ];
620
621 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
622 """
623 Investigates a failed VM run.
624 """
625
626 def investigateLogSet():
627 """
628 Investigates the current set of VM related logs.
629 """
630 self.dprint('investigateLogSet: %u chars result log, %u chars VM log, %u chars kernel log'
631 % ( len(sResultLog) if sResultLog is not None else 0,
632 len(sVMLog) if sVMLog is not None else 0,
633 len(sKrnlLog) if sKrnlLog is not None else 0), );
634 #self.dprint('main.log<<<\n%s\n<<<\n' % (sResultLog,));
635 #self.dprint('vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
636 #self.dprint('krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
637
638 # TODO: more
639
640 #
641 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
642 #
643 sDetails = self.findInAnyAndReturnResetOfLine([ sVMLog, sResultLog ],
644 'GIM: HyperV: Guest indicates a fatal condition! P0=');
645 if sDetails is not None:
646 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
647 sKey = sDetails.split(' ', 1)[0];
648 try: sKey = '0x%08X' % (int(sKey, 16),);
649 except: pass;
650 if sKey in self.asBsodReasons:
651 tReason = ( self.ksBsodCategory, sKey );
652 elif sKey.lower() in self.asBsodReasons: # just in case.
653 tReason = ( self.ksBsodCategory, sKey.lower() );
654 else:
655 self.dprint('BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
656 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
657 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
658
659 #
660 # Look for linux panic.
661 #
662 if sKrnlLog is not None:
663 pass; ## @todo
664
665 #
666 # Loop thru the simple stuff.
667 #
668 fFoundSomething = False;
669 for fStopOnHit, sNeedle, tReason in self.katSimpleMainAndVmLogReasons:
670 if sResultLog.find(sNeedle) > 0 or sVMLog.find(sNeedle) > 0:
671 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
672 if fStopOnHit:
673 return True;
674 fFoundSomething = True;
675
676 #
677 # Check for repeated reboots...
678 #
679 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
680 if cResets > 10:
681 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
682 sComment = 'Counted %s reboots' % (cResets,));
683
684 return fFoundSomething;
685
686 #
687 # Check if we got any VM or/and kernel logs. Treat them as sets in
688 # case we run multiple VMs here.
689 #
690 sVMLog = None;
691 sKrnlLog = None;
692 for oFile in oFailedResult.aoFiles:
693 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
694 if sVMLog is not None:
695 if investigateLogSet() is True:
696 return True;
697 sKrnlLog = None;
698 sVMLog = oCaseFile.getLogFile(oFile);
699 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
700 sKrnlLog = oCaseFile.getLogFile(oFile);
701 if sVMLog is not None and investigateLogSet() is True:
702 return True;
703
704 return None;
705
706
707 def isResultFromVMRun(self, oFailedResult, sResultLog):
708 """
709 Checks if this result and corresponding log snippet looks like a VM run.
710 """
711
712 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
713 if sResultLog.find(' startVm') > 0:
714 return True;
715
716 # Any other indicators? No?
717 _ = oFailedResult;
718 return False;
719
720
721 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
722 """
723 Checks out a VBox VM test.
724
725 This is generic investigation of a test running one or more VMs, like
726 for example a smoke test or a guest installation test.
727
728 The fSingleVM parameter is a hint, which probably won't come in useful.
729 """
730 _ = fSingleVM;
731
732 #
733 # Get a list of test result failures we should be looking into and the main log.
734 #
735 aoFailedResults = oCaseFile.oTree.getListOfFailures();
736 sMainLog = oCaseFile.getMainLog();
737
738 #
739 # There are a set of errors ending up on the top level result record.
740 # Should deal with these first.
741 #
742 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
743 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
744 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
745 # ignore other failures in the test if we're not a little bit careful here.
746 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
747 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
748 return self.caseClosed(oCaseFile);
749
750 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
751 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
752 (' Timeout: ', ' Attempting to abort child...',) ):
753 if sMainLog.find('*** glibc detected *** /') > 0:
754 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
755 else:
756 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
757 return self.caseClosed(oCaseFile);
758
759
760
761 # Look for heap corruption without visible hang.
762 if sMainLog.find('*** glibc detected *** /') > 0 \
763 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
764 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
765 return self.caseClosed(oCaseFile);
766
767 #
768 # Go thru each failed result.
769 #
770 for oFailedResult in aoFailedResults:
771 self.dprint('Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
772 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
773 if oFailedResult.sName == 'Installing VirtualBox':
774 self.vprint('TODO: Installation failure');
775 elif oFailedResult.sName == 'Uninstalling VirtualBox':
776 self.vprint('TODO: Uninstallation failure');
777 elif self.isResultFromVMRun(oFailedResult, sResultLog):
778 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
779 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
780 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
781 elif sResultLog.find('The machine is not mutable (state is ') > 0:
782 self.vprint('Ignorining "machine not mutable" error as it is probably due to an earlier problem');
783 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
784 else:
785 self.vprint('TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
786 self.dprint('%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
787
788 #
789 # Report home and close the case if we got them all, otherwise log it.
790 #
791 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
792 return self.caseClosed(oCaseFile);
793
794 if len(oCaseFile.dReasonForResultId) > 0:
795 self.vprint('TODO: Got %u out of %u - close, but no cigar. :-/'
796 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
797 else:
798 self.vprint('XXX: Could not figure out anything at all! :-(');
799 return False;
800
801
802 def reasoningFailures(self):
803 """
804 Guess the reason for failures.
805 """
806 #
807 # Get a list of failed test sets without any assigned failure reason.
808 #
809 cGot = 0;
810 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
811 for oTestSet in aoTestSets:
812 self.dprint('');
813 self.dprint('reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
814
815 #
816 # Open a case file and assign it to the right investigator.
817 #
818 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
819 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
820 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
821 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
822 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
823
824 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
825
826 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
827 self.dprint('investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
828 fRc = self.investigateBadTestBox(oCaseFile);
829 elif oCaseFile.isVBoxUnitTest():
830 self.dprint('investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
831 fRc = self.investigateVBoxUnitTest(oCaseFile);
832 elif oCaseFile.isVBoxInstallTest():
833 self.dprint('investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
834 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
835 elif oCaseFile.isVBoxSmokeTest():
836 self.dprint('investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
837 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
838 else:
839 self.vprint('reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
840 fRc = False;
841 cGot += fRc is True;
842 self.vprint('reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
843 return 0;
844
845
846 def main(self):
847 """
848 The 'main' function.
849 Return exit code (0, 1, etc).
850 """
851 # Database stuff.
852 self.oDb = TMDatabaseConnection()
853 self.oTestResultLogic = TestResultLogic(self.oDb);
854 self.oTestSetLogic = TestSetLogic(self.oDb);
855 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
856 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
857 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
858 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
859
860 # Get a fix on our 'now' before we do anything..
861 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
862 self.tsNow = self.oDb.fetchOne();
863
864 # If we're suppost to commit anything we need to get our user ID.
865 rcExit = 0;
866 if self.oConfig.fRealRun:
867 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
868 if self.oLogin is None:
869 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
870 else:
871 self.uidSelf = self.oLogin.uid;
872
873 # Do the stuff.
874 if rcExit == 0:
875 rcExit = self.selfCheck();
876 if rcExit == 0:
877 rcExit = self.badTestBoxManagement();
878 rcExit2 = self.reasoningFailures();
879 if rcExit == 0:
880 rcExit = rcExit2;
881
882 # Cleanup.
883 self.oFailureReasonLogic = None;
884 self.oTestResultFailureLogic = None;
885 self.oTestSetLogic = None;
886 self.oTestResultLogic = None;
887 self.oDb.close();
888 self.oDb = None;
889 if self.oLogFile is not None:
890 self.oLogFile.close();
891 self.oLogFile = None;
892 return rcExit;
893
894if __name__ == '__main__':
895 sys.exit(VirtualTestSheriff().main());
896
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette