VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 79945

Last change on this file since 79945 was 79945, checked in by vboxsync, 5 years ago

vsheriff.py: VERR_INTNET_FLT_IF_NOT_FOUND. bugref:9151

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 77.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 79945 2019-07-24 00:11:17Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2019 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 79945 $"
39
40
41# Standard python imports
42import sys;
43import os;
44import hashlib;
45import subprocess;
46import smtplib
47from email.mime.multipart import MIMEMultipart
48from email.mime.text import MIMEText
49from email.utils import COMMASPACE
50
51if sys.version_info[0] >= 3:
52 from io import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
53else:
54 from StringIO import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
55from optparse import OptionParser; # pylint: disable=deprecated-module
56from PIL import Image; # pylint: disable=import-error
57
58# Add Test Manager's modules path
59g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
60sys.path.append(g_ksTestManagerDir);
61
62# Test Manager imports
63from testmanager.core.db import TMDatabaseConnection;
64from testmanager.core.build import BuildDataEx;
65from testmanager.core.failurereason import FailureReasonLogic;
66from testmanager.core.testbox import TestBoxLogic, TestBoxData;
67from testmanager.core.testcase import TestCaseDataEx;
68from testmanager.core.testgroup import TestGroupData;
69from testmanager.core.testset import TestSetLogic, TestSetData;
70from testmanager.core.testresults import TestResultLogic, TestResultFileData;
71from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
72from testmanager.core.useraccount import UserAccountLogic;
73from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
74 g_ksAlertSubject, g_asAlertList, g_ksLomPassword;
75
76# Python 3 hacks:
77if sys.version_info[0] >= 3:
78 xrange = range; # pylint: disable=redefined-builtin,invalid-name
79
80
81class VirtualTestSheriffCaseFile(object):
82 """
83 A failure investigation case file.
84
85 """
86
87
88 ## Max log file we'll read into memory. (256 MB)
89 kcbMaxLogRead = 0x10000000;
90
91 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
92 self.oSheriff = oSheriff;
93 self.oTestSet = oTestSet; # TestSetData
94 self.oTree = oTree; # TestResultDataEx
95 self.oBuild = oBuild; # BuildDataEx
96 self.oTestBox = oTestBox; # TestBoxData
97 self.oTestGroup = oTestGroup; # TestGroupData
98 self.oTestCase = oTestCase; # TestCaseDataEx
99 self.sMainLog = ''; # The main log file. Empty string if not accessible.
100
101 # Generate a case file name.
102 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
103 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
104 % ( self.oTestSet.idTestSet,
105 self.oTestCase.sName,
106 self.oTestBox.sName,
107 self.oTestBox.sOs,
108 self.oTestBox.sOsVersion,
109 self.oTestBox.sCpuArch,
110 self.oTestBox.sCpuName,
111 self.oTestBox.sCpuVendor,
112 self.oBuild.oCat.sProduct,
113 self.oBuild.oCat.sBranch,
114 self.oBuild.oCat.sType,
115 self.oBuild.iRevision, );
116
117 # Investigation notes.
118 self.tReason = None; # None or one of the ktReason_XXX constants.
119 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
120 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
121
122 #
123 # Reason.
124 #
125
126 def noteReason(self, tReason):
127 """ Notes down a possible reason. """
128 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
129 self.tReason = tReason;
130 return True;
131
132 def noteReasonForId(self, tReason, idTestResult, sComment = None):
133 """ Notes down a possible reason for a specific test result. """
134 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
135 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
136 (u' (%s)' % (sComment,)) if sComment is not None else ''));
137 self.dReasonForResultId[idTestResult] = tReason;
138 if sComment is not None:
139 self.dCommentForResultId[idTestResult] = sComment;
140 return True;
141
142
143 #
144 # Test classification.
145 #
146
147 def isVBoxTest(self):
148 """ Test classification: VirtualBox (using the build) """
149 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
150
151 def isVBoxUnitTest(self):
152 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
153 return self.isVBoxTest() \
154 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower() == 'misc: unit tests');
155
156 def isVBoxInstallTest(self):
157 """ Test case classification: VirtualBox Guest installation test. """
158 return self.isVBoxTest() \
159 and self.oTestCase.sName.lower().startswith('install:');
160
161 def isVBoxUnattendedInstallTest(self):
162 """ Test case classification: VirtualBox Guest installation test. """
163 return self.isVBoxTest() \
164 and self.oTestCase.sName.lower().startswith('uinstall:');
165
166 def isVBoxUSBTest(self):
167 """ Test case classification: VirtualBox USB test. """
168 return self.isVBoxTest() \
169 and self.oTestCase.sName.lower().startswith('usb:');
170
171 def isVBoxStorageTest(self):
172 """ Test case classification: VirtualBox Storage test. """
173 return self.isVBoxTest() \
174 and self.oTestCase.sName.lower().startswith('storage:');
175
176 def isVBoxGAsTest(self):
177 """ Test case classification: VirtualBox Guest Additions test. """
178 return self.isVBoxTest() \
179 and ( self.oTestCase.sName.lower().startswith('guest additions')
180 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
181
182 def isVBoxAPITest(self):
183 """ Test case classification: VirtualBox API test. """
184 return self.isVBoxTest() \
185 and self.oTestCase.sName.lower().startswith('api:');
186
187 def isVBoxBenchmarkTest(self):
188 """ Test case classification: VirtualBox Benchmark test. """
189 return self.isVBoxTest() \
190 and self.oTestCase.sName.lower().startswith('benchmark:');
191
192 def isVBoxSmokeTest(self):
193 """ Test case classification: Smoke test. """
194 return self.isVBoxTest() \
195 and self.oTestCase.sName.lower().startswith('smoketest');
196
197 def isVBoxSerialTest(self):
198 """ Test case classification: Smoke test. """
199 return self.isVBoxTest() \
200 and self.oTestCase.sName.lower().startswith('serial:');
201
202
203 #
204 # Utility methods.
205 #
206
207 def getMainLog(self):
208 """
209 Tries to read the main log file since this will be the first source of information.
210 """
211 if self.sMainLog:
212 return self.sMainLog;
213 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
214 if oFile is not None:
215 try:
216 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
217 except Exception as oXcpt:
218 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
219 self.sMainLog = '';
220 else:
221 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
222 return self.sMainLog;
223
224 def getLogFile(self, oFile):
225 """
226 Tries to read the given file as a utf-8 log file.
227 oFile is a TestFileDataEx instance.
228 Returns empty string if problems opening or reading the file.
229 """
230 sContent = '';
231 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
232 if oFile is not None:
233 try:
234 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
235 except Exception as oXcpt:
236 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
237 else:
238 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
239 return sContent;
240
241 def getScreenshotSha256(self, oFile):
242 """
243 Tries to read the given screenshot file, uncompress it, and do SHA-2
244 on the raw pixels.
245 Returns SHA-2 digest string on success, None on failure.
246 """
247 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
248 try:
249 abImageFile = oImgFile.read();
250 except Exception as oXcpt:
251 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
252 else:
253 try:
254 oImage = Image.open(StringIO(abImageFile));
255 except Exception as oXcpt:
256 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
257 else:
258 try:
259 oHash = hashlib.sha256();
260 oHash.update(oImage.tostring());
261 except Exception as oXcpt:
262 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
263 else:
264 return oHash.hexdigest();
265 return None;
266
267
268
269 def isSingleTestFailure(self):
270 """
271 Figure out if this is a single test failing or if it's one of the
272 more complicated ones.
273 """
274 if self.oTree.cErrors == 1:
275 return True;
276 if self.oTree.deepCountErrorContributers() <= 1:
277 return True;
278 return False;
279
280
281
282class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
283 """
284 Add build info into Test Manager database.
285 """
286
287 ## The user account for the virtual sheriff.
288 ksLoginName = 'vsheriff';
289
290 def __init__(self):
291 """
292 Parse command line.
293 """
294 self.oDb = None;
295 self.tsNow = None;
296 self.oTestResultLogic = None;
297 self.oTestSetLogic = None;
298 self.oFailureReasonLogic = None; # FailureReasonLogic;
299 self.oTestResultFailureLogic = None; # TestResultFailureLogic
300 self.oLogin = None;
301 self.uidSelf = -1;
302 self.oLogFile = None;
303 self.asBsodReasons = [];
304 self.asUnitTestReasons = [];
305
306 oParser = OptionParser();
307 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
308 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
309 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
310 help = 'Work period specified in hours. Defauls is 2 hours.');
311 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
312 help = 'Whether to commit the findings to the database. Default is a dry run.');
313 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
314 help = 'Only investigate this one. Accumulates IDs when repeated.');
315 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
316 help = 'Quiet execution');
317 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
318 help = 'Where to log messages.');
319 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
320 help = 'Enables debug mode.');
321
322 (self.oConfig, _) = oParser.parse_args();
323
324 if self.oConfig.sLogFile:
325 self.oLogFile = open(self.oConfig.sLogFile, "a");
326 self.oLogFile.write('VirtualTestSheriff: $Revision: 79945 $ \n');
327
328
329 def eprint(self, sText):
330 """
331 Prints error messages.
332 Returns 1 (for exit code usage.)
333 """
334 print('error: %s' % (sText,));
335 if self.oLogFile is not None:
336 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
337 return 1;
338
339 def dprint(self, sText):
340 """
341 Prints debug info.
342 """
343 if self.oConfig.fDebug:
344 if not self.oConfig.fQuiet:
345 print('debug: %s' % (sText, ));
346 if self.oLogFile is not None:
347 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
348 return 0;
349
350 def vprint(self, sText):
351 """
352 Prints verbose info.
353 """
354 if not self.oConfig.fQuiet:
355 print('info: %s' % (sText,));
356 if self.oLogFile is not None:
357 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
358 return 0;
359
360 def getFailureReason(self, tReason):
361 """ Gets the failure reason object for tReason. """
362 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
363
364 def selfCheck(self):
365 """ Does some self checks, looking up things we expect to be in the database and such. """
366 rcExit = 0;
367 for sAttr in dir(self.__class__):
368 if sAttr.startswith('ktReason_'):
369 tReason = getattr(self.__class__, sAttr);
370 oFailureReason = self.getFailureReason(tReason);
371 if oFailureReason is None:
372 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
373 % (tReason[1], tReason[0],));
374
375 # Check the user account as well.
376 if self.oLogin is None:
377 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
378 if oLogin is None:
379 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
380 return rcExit;
381
382 def sendEmailAlert(self, uidAuthor, sBodyText):
383 """
384 Sends email alert.
385 """
386
387 # Get author email
388 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
389 sFrom = self.oDb.fetchOne();
390 if sFrom is not None:
391 sFrom = sFrom[0];
392 else:
393 sFrom = g_ksAlertFrom;
394
395 # Gather recipient list.
396 asEmailList = [];
397 for sUser in g_asAlertList:
398 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
399 sEmail = self.oDb.fetchOne();
400 if sEmail:
401 asEmailList.append(sEmail[0]);
402 if not asEmailList:
403 return self.eprint('No email addresses to send alter to!');
404
405 # Compose the message.
406 oMsg = MIMEMultipart();
407 oMsg['From'] = sFrom;
408 oMsg['To'] = COMMASPACE.join(asEmailList);
409 oMsg['Subject'] = g_ksAlertSubject;
410 oMsg.attach(MIMEText(sBodyText, 'plain'))
411
412 # Try send it.
413 try:
414 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
415 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
416 oSMTP.quit()
417 except smtplib.SMTPException as oXcpt:
418 return self.eprint('Failed to send mail: %s' % (oXcpt,));
419
420 return 0;
421
422 def badTestBoxManagement(self):
423 """
424 Looks for bad test boxes and first tries once to reboot them then disables them.
425 """
426 rcExit = 0;
427
428 #
429 # We skip this entirely if we're running in the past and not in harmless debug mode.
430 #
431 if self.oConfig.cStartHoursAgo != 0 \
432 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
433 return rcExit;
434 tsNow = self.tsNow if self.oConfig.fDebug else None;
435 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
436 oTestBoxLogic = TestBoxLogic(self.oDb);
437
438 #
439 # Generate a list of failures reasons we consider bad-testbox behavior.
440 #
441 aidFailureReasons = [
442 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
443 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
444 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
445 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
446 ];
447
448 #
449 # Get list of bad test boxes for given period and check them out individually.
450 #
451 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
452 aidFailureReasons = aidFailureReasons);
453 for idTestBox in aidBadTestBoxes:
454 # Skip if the testbox is already disabled or has a pending reboot command.
455 try:
456 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
457 except Exception as oXcpt:
458 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
459 continue;
460 if not oTestBox.fEnabled:
461 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
462 % ( idTestBox, oTestBox.sName, ));
463 continue;
464 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
465 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
466 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
467 continue;
468
469 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
470 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
471 cOkay = 0;
472 cBad = 0;
473 iFirstOkay = len(aoSets);
474 for iSet, oSet in enumerate(aoSets):
475 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
476 cBad += 1;
477 else:
478 # Check for bad failure reasons.
479 oFailure = None;
480 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
481 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
482 aoFailedResults = oTree.getListOfFailures();
483 for oFailedResult in aoFailedResults:
484 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
485 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
486 break;
487 else:
488 oFailure = None;
489 if oFailure is not None:
490 cBad += 1;
491 else:
492 # This is an okay test result then.
493 ## @todo maybe check the elapsed time here, it could still be a bad run?
494 cOkay += 1;
495 if iFirstOkay > iSet:
496 iFirstOkay = iSet;
497 if iSet > 10:
498 break;
499
500 # We react if there are two or more bad-testbox statuses at the head of the
501 # history and at least three in the last 10 results.
502 if iFirstOkay >= 2 and cBad > 2:
503 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
504 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
505 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
506 self.vprint(sComment);
507 self.sendEmailAlert(self.uidSelf, sComment);
508 if self.oConfig.fRealRun is True:
509 try:
510 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
511 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
512 % (iFirstOkay, cBad, cOkay),);
513 except Exception as oXcpt:
514 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
515 else:
516 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
517 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
518 self.vprint(sComment);
519 self.sendEmailAlert(self.uidSelf, sComment);
520 if self.oConfig.fRealRun is True:
521 try:
522 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
523 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
524 % (iFirstOkay, cBad, cOkay),);
525 except Exception as oXcpt:
526 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
527 else:
528 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
529 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
530
531 #
532 # Reset hanged testboxes
533 #
534 cStatusTimeoutMins = 10;
535
536 self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
537 ' FROM TestBoxStatuses, TestBoxes\n'
538 ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
539 ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
540 ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
541 ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
542 for idTestBox in self.oDb.fetchAll():
543 idTestBox = idTestBox[0];
544 try:
545 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
546 except Exception as oXcpt:
547 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
548 continue;
549 # Skip if the testbox is already disabled, already reset or there's no iLOM
550 if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
551 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
552 % ( idTestBox, oTestBox.sName, ));
553 continue;
554 ## @todo get iLOM credentials from a table?
555 sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
556 try:
557 oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
558 sStdout = oPs.communicate()[0];
559 iRC = oPs.wait();
560
561 oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
562 oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
563
564 sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
565 self.vprint(sComment);
566 self.sendEmailAlert(self.uidSelf, sComment);
567
568 except Exception as oXcpt:
569 rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
570
571 return rcExit;
572
573
574 ## @name Failure reasons we know.
575 ## @{
576 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
577 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
578 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
579 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
580 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
581 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
582 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
583 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
584 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
585 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
586 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
587 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
588 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
589 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
590 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
591 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
592 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
593 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
594 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
595 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
596 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
597 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
598 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
599 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
600 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
601 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
602 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
603 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
604 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
605 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
606 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
607 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
608 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
609 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
610 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
611 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
612 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
613 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
614 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
615 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
616 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
617 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
618 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
619 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
620 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
621 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
622 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
623 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
624 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
625 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
626 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
627 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
628 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
629 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
630 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
631 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
632 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
633 ## @}
634
635 ## BSOD category.
636 ksBsodCategory = 'BSOD';
637 ## Special reason indicating that the flesh and blood sheriff has work to do.
638 ksBsodAddNew = 'Add new BSOD';
639
640 ## Unit test category.
641 ksUnitTestCategory = 'Unit';
642 ## Special reason indicating that the flesh and blood sheriff has work to do.
643 ksUnitTestAddNew = 'Add new';
644
645 ## Used for indica that we shouldn't report anything for this test result ID and
646 ## consider promoting the previous error to test set level if it's the only one.
647 ktHarmless = ( 'Probably', 'Caused by previous error' );
648
649
650 def caseClosed(self, oCaseFile):
651 """
652 Reports the findings in the case and closes it.
653 """
654 #
655 # Log it and create a dReasonForReasultId we can use below.
656 #
657 dCommentForResultId = oCaseFile.dCommentForResultId;
658 if oCaseFile.dReasonForResultId:
659 # Must weed out ktHarmless.
660 dReasonForResultId = {};
661 for idKey, tReason in oCaseFile.dReasonForResultId.items():
662 if tReason is not self.ktHarmless:
663 dReasonForResultId[idKey] = tReason;
664 if not dReasonForResultId:
665 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
666 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
667 return False;
668
669 # Try promote to single reason.
670 atValues = dReasonForResultId.values();
671 fSingleReason = True;
672 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
673 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
674 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
675 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
676 else:
677 fSingleReason = False;
678 if fSingleReason:
679 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
680 if dCommentForResultId:
681 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
682 elif oCaseFile.tReason is not None:
683 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
684 else:
685 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
686 return False;
687
688 self.vprint(u'Closing %s with following reason%s: %s'
689 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
690
691 #
692 # Add the test failure reason record(s).
693 #
694 for idTestResult, tReason in dReasonForResultId.items():
695 oFailureReason = self.getFailureReason(tReason);
696 if oFailureReason is not None:
697 sComment = 'Set by $Revision: 79945 $' # Handy for reverting later.
698 if idTestResult in dCommentForResultId:
699 sComment += ': ' + dCommentForResultId[idTestResult];
700
701 oAdd = TestResultFailureData();
702 oAdd.initFromValues(idTestResult = idTestResult,
703 idFailureReason = oFailureReason.idFailureReason,
704 uidAuthor = self.uidSelf,
705 idTestSet = oCaseFile.oTestSet.idTestSet,
706 sComment = sComment,);
707 if self.oConfig.fRealRun:
708 try:
709 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
710 except Exception as oXcpt:
711 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
712 % (oXcpt, oAdd, oCaseFile.sLongName,));
713 else:
714 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
715 return True;
716
717 #
718 # Tools for assiting log parsing.
719 #
720
721 @staticmethod
722 def matchFollowedByLines(sStr, off, asFollowingLines):
723 """ Worker for isThisFollowedByTheseLines. """
724
725 # Advance off to the end of the line.
726 off = sStr.find('\n', off);
727 if off < 0:
728 return False;
729 off += 1;
730
731 # Match each string with the subsequent lines.
732 for iLine, sLine in enumerate(asFollowingLines):
733 offEnd = sStr.find('\n', off);
734 if offEnd < 0:
735 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
736 if sLine and sStr.find(sLine, off, offEnd) < 0:
737 return False;
738
739 # next line.
740 off = offEnd + 1;
741
742 return True;
743
744 @staticmethod
745 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
746 """
747 Looks for a line contining sFirst which is then followed by lines
748 with the strings in asFollowingLines. (No newline chars anywhere!)
749 Returns True / False.
750 """
751 off = sStr.find(sFirst, 0);
752 while off >= 0:
753 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
754 return True;
755 off = sStr.find(sFirst, off + 1);
756 return False;
757
758 @staticmethod
759 def findAndReturnRestOfLine(sHaystack, sNeedle):
760 """
761 Looks for sNeedle in sHaystack.
762 Returns The text following the needle up to the end of the line.
763 Returns None if not found.
764 """
765 if sHaystack is None:
766 return None;
767 off = sHaystack.find(sNeedle);
768 if off < 0:
769 return None;
770 off += len(sNeedle)
771 offEol = sHaystack.find('\n', off);
772 if offEol < 0:
773 offEol = len(sHaystack);
774 return sHaystack[off:offEol]
775
776 @staticmethod
777 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
778 """
779 Looks for sNeedle in zeroe or more haystacks (asHaystack).
780 Returns The text following the first needed found up to the end of the line.
781 Returns None if not found.
782 """
783 for sHaystack in asHaystacks:
784 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
785 if sRet is not None:
786 return sRet;
787 return None;
788
789
790 #
791 # The investigative units.
792 #
793
794 katSimpleInstallUninstallMainLogReasons = [
795 # ( Whether to stop on hit, reason tuple, needle text. )
796 ( False, ktReason_Host_LeftoverService,
797 'SERVICE_NAME: vbox' ),
798 ];
799
800 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
801 'darwin': [
802 # ( Whether to stop on hit, reason tuple, needle text. )
803 ( True, ktReason_Host_DriverNotUnloading,
804 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
805 ],
806 'linux': [
807 # ( Whether to stop on hit, reason tuple, needle text. )
808 ( True, ktReason_Host_DriverNotCompilable,
809 'This system is not currently set up to build kernel modules' ),
810 ( True, ktReason_Host_DriverNotCompilable,
811 'This system is currently not set up to build kernel modules' ),
812 ( True, ktReason_Host_InstallationFailed,
813 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
814 ( True, ktReason_Host_DriverNotUnloading,
815 'Cannot unload module vboxdrv'),
816 ],
817 'solaris': [
818 # ( Whether to stop on hit, reason tuple, needle text. )
819 ( True, ktReason_Host_InstallationFailed,
820 'svcadm: Couldn\'t bind to svc.configd.' ),
821 ( True, ktReason_Host_InstallationFailed,
822 'pkgadd: ERROR: postinstall script did not complete successfully' ),
823 ( True, ktReason_Host_DriverNotUnloading,
824 'can\'t unload the module: Device busy' ),
825 ],
826 };
827
828
829 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
830 """
831 Investigates an install or uninstall failure.
832
833 We lump the two together since the installation typically also performs
834 an uninstall first and will be seeing similar issues to the uninstall.
835 """
836
837 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
838 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
839 return True;
840
841 atSimple = self.katSimpleInstallUninstallMainLogReasons;
842 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
843 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
844
845 fFoundSomething = False;
846 for fStopOnHit, tReason, sNeedle in atSimple:
847 if sResultLog.find(sNeedle) > 0:
848 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
849 if fStopOnHit:
850 return True;
851 fFoundSomething = True;
852
853 return fFoundSomething if fFoundSomething else None;
854
855
856 def investigateBadTestBox(self, oCaseFile):
857 """
858 Checks out bad-testbox statuses.
859 """
860 _ = oCaseFile;
861 return False;
862
863
864 def investigateVBoxUnitTest(self, oCaseFile):
865 """
866 Checks out a VBox unittest problem.
867 """
868
869 #
870 # Process simple test case failures first, using their name as reason.
871 # We do the reason management just like for BSODs.
872 #
873 cRelevantOnes = 0;
874 sMainLog = oCaseFile.getMainLog();
875 aoFailedResults = oCaseFile.oTree.getListOfFailures();
876 for oFailedResult in aoFailedResults:
877 if oFailedResult is oCaseFile.oTree:
878 self.vprint('TODO: toplevel failure');
879 cRelevantOnes += 1
880
881 elif oFailedResult.sName == 'Installing VirtualBox':
882 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
883 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
884 cRelevantOnes += 1
885
886 elif oFailedResult.sName == 'Uninstalling VirtualBox':
887 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
888 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
889 cRelevantOnes += 1
890
891 elif oFailedResult.oParent is not None:
892 # Get the 2nd level node because that's where we'll find the unit test name.
893 while oFailedResult.oParent.oParent is not None:
894 oFailedResult = oFailedResult.oParent;
895
896 # Only report a failure once.
897 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
898 sKey = oFailedResult.sName;
899 if sKey.startswith('testcase/'):
900 sKey = sKey[9:];
901 if sKey in self.asUnitTestReasons:
902 tReason = ( self.ksUnitTestCategory, sKey );
903 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
904 else:
905 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
906 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
907 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
908 cRelevantOnes += 1
909 else:
910 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
911
912 #
913 # If we've caught all the relevant ones by now, report the result.
914 #
915 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
916 return self.caseClosed(oCaseFile);
917 return False;
918
919 def extractGuestCpuStack(self, sInfoText):
920 """
921 Extracts the guest CPU stacks from the input file.
922
923 Returns a dictionary keyed by the CPU number, value being a list of
924 raw stack lines (no header).
925 Returns empty dictionary if no stacks where found.
926 """
927 dRet = {};
928 off = 0;
929 while True:
930 # Find the stack.
931 offStart = sInfoText.find('=== start guest stack VCPU ', off);
932 if offStart < 0:
933 break;
934 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
935 if offEnd >= 0:
936 offEnd += 3;
937 else:
938 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
939 if offEnd < 0:
940 offEnd = len(sInfoText);
941
942 sStack = sInfoText[offStart : offEnd];
943 sStack = sStack.replace('\r',''); # paranoia
944 asLines = sStack.split('\n');
945
946 # Figure the CPU.
947 asWords = asLines[0].split();
948 if len(asWords) < 6 or not asWords[5].isdigit():
949 break;
950 iCpu = int(asWords[5]);
951
952 # Add it and advance.
953 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
954 off = offEnd;
955 return dRet;
956
957 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
958 """ Investigates kvm_lock_spinning deadlocks """
959 #
960 # Extract the stacks. We need more than one CPU to create a deadlock.
961 #
962 dStacks = self.extractGuestCpuStack(sInfoText);
963 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
964 if len(dStacks) >= 2:
965 #
966 # Examin each of the stacks. Each must have kvm_lock_spinning in
967 # one of the first three entries.
968 #
969 cHits = 0;
970 for iCpu in dStacks:
971 asBacktrace = dStacks[iCpu];
972 for iFrame in xrange(min(3, len(asBacktrace))):
973 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
974 cHits += 1;
975 break;
976 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
977 if cHits == len(dStacks):
978 return (True, self.ktReason_VMM_kvm_lock_spinning);
979
980 _ = dLogs; _ = oCaseFile;
981 return (False, None);
982
983 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
984 """ Investigates HalReturnToFirmware hangs """
985 del oCaseFile
986 del sInfoText
987 del dLogs
988 # hope that's sufficient
989 return (True, self.ktReason_Unknown_HalReturnToFirmware);
990
991 ## Things we search a main or VM log for to figure out why something went bust.
992 katSimpleMainAndVmLogReasons = [
993 # ( Whether to stop on hit, reason tuple, needle text. )
994 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
995 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
996 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
997 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
998 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
999 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1000 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1001 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1002 ( True, ktReason_Networking_Nonexistent_host_nic,
1003 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1004 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1005 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1006 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1007 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1008 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1009 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1010 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1011 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1012 ( True, ktReason_Unknown_File_Not_Found,
1013 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1014 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1015 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1016 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1017 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1018 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1019 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1020 ( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ),
1021 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1022 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1023 ];
1024
1025 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1026 katSimpleVBoxHardeningLogReasons = [
1027 # ( Whether to stop on hit, reason tuple, needle text. )
1028 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1029 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1030 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1031 ];
1032
1033 ## Things we search a kernel.log file for to figure out why something went bust.
1034 katSimpleKernelLogReasons = [
1035 # ( Whether to stop on hit, reason tuple, needle text. )
1036 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1037 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1038 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1039 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1040 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1041 ];
1042
1043 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1044 katSimpleVgaTextReasons = [
1045 # ( Whether to stop on hit, reason tuple, needle text. )
1046 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1047 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1048 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1049 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1050 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1051 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1052 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1053 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1054 "and send a report. Then try booting with the 'noapic' option\n"
1055 "\n" ),
1056 ( True, ktReason_OSInstall_GRUB_hang,
1057 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1058 ( True, ktReason_OSInstall_GRUB_hang,
1059 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1060 ( True, ktReason_OSInstall_GRUB_hang,
1061 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1062 ( True, ktReason_OSInstall_GRUB_hang,
1063 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1064 ( True, ktReason_OSInstall_Udev_hang,
1065 "\nStarting udev:\n\n\n\n" ),
1066 ( True, ktReason_OSInstall_Udev_hang,
1067 "\nStarting udev:\n------" ),
1068 ( True, ktReason_Panic_BootManagerC000000F,
1069 "Windows failed to start. A recent hardware or software change might be the" ),
1070 ( True, ktReason_BootManager_Image_corrupt,
1071 "BOOTMGR image is corrupt. The system cannot boot." ),
1072 ];
1073
1074 ## Things we search for in the info.txt file. Require handlers for now.
1075 katInfoTextHandlers = [
1076 # ( Trigger text, handler method )
1077 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1078 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1079 ];
1080
1081 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1082 katSimpleScreenshotHashReasons = [
1083 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1084 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1085 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1086 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1087 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1088 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1089 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1090 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1091 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1092 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1093 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1094 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1095 ];
1096
1097 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1098 """
1099 Investigates a failed VM run.
1100 """
1101
1102 def investigateLogSet():
1103 """
1104 Investigates the current set of VM related logs.
1105 """
1106 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1107 % ( len(sResultLog if sResultLog else ''),
1108 len(sVMLog if sVMLog else ''),
1109 len(sKrnlLog if sKrnlLog else ''),
1110 len(sVgaText if sVgaText else ''),
1111 len(sInfoText if sInfoText else ''),
1112 len(sNtHardLog if sNtHardLog else ''), ));
1113
1114 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1115 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1116 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1117 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1118 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1119 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1120
1121 # TODO: more
1122
1123 #
1124 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1125 #
1126 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1127 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1128 if sDetails is not None:
1129 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1130 sKey = sDetails.split(' ', 1)[0];
1131 try: sKey = '0x%08X' % (int(sKey, 16),);
1132 except: pass;
1133 if sKey in self.asBsodReasons:
1134 tReason = ( self.ksBsodCategory, sKey );
1135 elif sKey.lower() in self.asBsodReasons: # just in case.
1136 tReason = ( self.ksBsodCategory, sKey.lower() );
1137 else:
1138 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1139 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1140 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1141
1142 #
1143 # Look for linux panic.
1144 #
1145 if sKrnlLog is not None:
1146 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
1147 if sKrnlLog.find(sNeedle) > 0:
1148 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1149 if fStopOnHit:
1150 return True;
1151 fFoundSomething = True;
1152
1153 #
1154 # Loop thru the simple stuff.
1155 #
1156 fFoundSomething = False;
1157 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
1158 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
1159 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1160 if fStopOnHit:
1161 return True;
1162 fFoundSomething = True;
1163
1164 # Continue with vga text.
1165 if sVgaText:
1166 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
1167 if sVgaText.find(sNeedle) > 0:
1168 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1169 if fStopOnHit:
1170 return True;
1171 fFoundSomething = True;
1172 _ = sInfoText;
1173
1174 # Continue with screen hashes.
1175 if sScreenHash is not None:
1176 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1177 if sScreenHash == sHash:
1178 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1179 if fStopOnHit:
1180 return True;
1181 fFoundSomething = True;
1182
1183 # Check VBoxHardening.log.
1184 if sNtHardLog is not None:
1185 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
1186 if sNtHardLog.find(sNeedle) > 0:
1187 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1188 if fStopOnHit:
1189 return True;
1190 fFoundSomething = True;
1191
1192 #
1193 # Complicated stuff.
1194 #
1195 dLogs = {
1196 'sVMLog': sVMLog,
1197 'sNtHardLog': sNtHardLog,
1198 'sScreenHash': sScreenHash,
1199 'sKrnlLog': sKrnlLog,
1200 'sVgaText': sVgaText,
1201 'sInfoText': sInfoText,
1202 };
1203
1204 # info.txt.
1205 if sInfoText:
1206 for sNeedle, fnHandler in self.katInfoTextHandlers:
1207 if sInfoText.find(sNeedle) > 0:
1208 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1209 if tReason is not None:
1210 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1211 if fStop:
1212 return True;
1213 fFoundSomething = True;
1214
1215 #
1216 # Check for repeated reboots...
1217 #
1218 if sVMLog is not None:
1219 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1220 if cResets > 10:
1221 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1222 sComment = 'Counted %s reboots' % (cResets,));
1223
1224 return fFoundSomething;
1225
1226 #
1227 # Check if we got any VM or/and kernel logs. Treat them as sets in
1228 # case we run multiple VMs here (this is of course ASSUMING they
1229 # appear in the order that terminateVmBySession uploads them).
1230 #
1231 cTimes = 0;
1232 sVMLog = None;
1233 sNtHardLog = None;
1234 sScreenHash = None;
1235 sKrnlLog = None;
1236 sVgaText = None;
1237 sInfoText = None;
1238 for oFile in oFailedResult.aoFiles:
1239 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1240 if 'VBoxHardening.log' not in oFile.sFile:
1241 if sVMLog is not None:
1242 if investigateLogSet() is True:
1243 return True;
1244 cTimes += 1;
1245 sInfoText = None;
1246 sVgaText = None;
1247 sKrnlLog = None;
1248 sScreenHash = None;
1249 sNtHardLog = None;
1250 sVMLog = oCaseFile.getLogFile(oFile);
1251 else:
1252 sNtHardLog = oCaseFile.getLogFile(oFile);
1253 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1254 sKrnlLog = oCaseFile.getLogFile(oFile);
1255 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1256 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1257 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1258 sInfoText = oCaseFile.getLogFile(oFile);
1259 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1260 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1261 if sScreenHash is not None:
1262 sScreenHash = sScreenHash.lower();
1263 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1264
1265 if ( sVMLog is not None \
1266 or sNtHardLog is not None \
1267 or cTimes == 0) \
1268 and investigateLogSet() is True:
1269 return True;
1270
1271 return None;
1272
1273
1274 def isResultFromVMRun(self, oFailedResult, sResultLog):
1275 """
1276 Checks if this result and corresponding log snippet looks like a VM run.
1277 """
1278
1279 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1280 if sResultLog.find(' startVm') > 0:
1281 return True;
1282
1283 # Any other indicators? No?
1284 _ = oFailedResult;
1285 return False;
1286
1287 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1288 """
1289 Checks out a VBox VM test.
1290
1291 This is generic investigation of a test running one or more VMs, like
1292 for example a smoke test or a guest installation test.
1293
1294 The fSingleVM parameter is a hint, which probably won't come in useful.
1295 """
1296 _ = fSingleVM;
1297
1298 #
1299 # Get a list of test result failures we should be looking into and the main log.
1300 #
1301 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1302 sMainLog = oCaseFile.getMainLog();
1303
1304 #
1305 # There are a set of errors ending up on the top level result record.
1306 # Should deal with these first.
1307 #
1308 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1309 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1310 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1311 # ignore other failures in the test if we're not a little bit careful here.
1312 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1313 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1314 return self.caseClosed(oCaseFile);
1315
1316 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1317 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1318 (' Timeout: ', ' Attempting to abort child...',) ):
1319 if sMainLog.find('*** glibc detected *** /') > 0:
1320 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1321 else:
1322 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1323 return self.caseClosed(oCaseFile);
1324
1325 # Look for heap corruption without visible hang.
1326 if sMainLog.find('*** glibc detected *** /') > 0 \
1327 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1328 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1329 return self.caseClosed(oCaseFile);
1330
1331 # Out of memory w/ timeout.
1332 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1333 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1334 return self.caseClosed(oCaseFile);
1335
1336 # Stale files like vts_rm.exe (windows).
1337 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1338 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1339 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1340 return self.caseClosed(oCaseFile);
1341
1342 #
1343 # XPCOM screwup
1344 #
1345 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1346 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1347 return self.caseClosed(oCaseFile);
1348
1349 #
1350 # Go thru each failed result.
1351 #
1352 for oFailedResult in aoFailedResults:
1353 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1354 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1355 if oFailedResult.sName == 'Installing VirtualBox':
1356 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1357
1358 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1359 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1360
1361 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1362 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1363
1364 elif sResultLog.find('most likely not unique') > 0:
1365 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1366 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1367 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1368
1369 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1370 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1371 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1372
1373 elif sResultLog.find('** error: no action was specified') > 0 \
1374 or sResultLog.find('(len(self._asXml, asText))') > 0:
1375 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1376
1377 else:
1378 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1379 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1380
1381 #
1382 # Report home and close the case if we got them all, otherwise log it.
1383 #
1384 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1385 return self.caseClosed(oCaseFile);
1386
1387 if oCaseFile.dReasonForResultId:
1388 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1389 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1390 else:
1391 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1392 return False;
1393
1394
1395 ## Things we search a main log for to figure out why something in the API test went bust.
1396 katSimpleApiMainLogReasons = [
1397 # ( Whether to stop on hit, reason tuple, needle text. )
1398 ( True, ktReason_Networking_Nonexistent_host_nic,
1399 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1400 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1401 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1402 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1403 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1404 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1405 ( True, ktReason_API_MoveVM_InvalidParameter,
1406 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1407 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1408 ];
1409
1410 def investigateVBoxApiTest(self, oCaseFile):
1411 """
1412 Checks out a VBox API test.
1413 """
1414
1415 #
1416 # Get a list of test result failures we should be looking into and the main log.
1417 #
1418 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1419 sMainLog = oCaseFile.getMainLog();
1420
1421 #
1422 # Go thru each failed result.
1423 #
1424 for oFailedResult in aoFailedResults:
1425 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1426 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1427 if oFailedResult.sName == 'Installing VirtualBox':
1428 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1429
1430 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1431 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1432
1433 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1434 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1435
1436 else:
1437 fFoundSomething = False;
1438 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1439 if sResultLog.find(sNeedle) > 0:
1440 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1441 fFoundSomething = True;
1442 if fStopOnHit:
1443 break;
1444 if fFoundSomething:
1445 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1446 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1447
1448 #
1449 # Report home and close the case if we got them all, otherwise log it.
1450 #
1451 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1452 return self.caseClosed(oCaseFile);
1453
1454 if oCaseFile.dReasonForResultId:
1455 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1456 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1457 else:
1458 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1459 return False;
1460
1461
1462 def reasoningFailures(self):
1463 """
1464 Guess the reason for failures.
1465 """
1466 #
1467 # Get a list of failed test sets without any assigned failure reason.
1468 #
1469 cGot = 0;
1470 if not self.oConfig.aidTestSets:
1471 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1472 tsNow = self.tsNow);
1473 else:
1474 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1475 for oTestSet in aoTestSets:
1476 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1477 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1478
1479 #
1480 # Open a case file and assign it to the right investigator.
1481 #
1482 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1483 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1484 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1485 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1486 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1487
1488 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1489
1490 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1491 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1492 fRc = self.investigateBadTestBox(oCaseFile);
1493
1494 elif oCaseFile.isVBoxUnitTest():
1495 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1496 fRc = self.investigateVBoxUnitTest(oCaseFile);
1497
1498 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1499 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1500 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1501
1502 elif oCaseFile.isVBoxUSBTest():
1503 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1504 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1505
1506 elif oCaseFile.isVBoxStorageTest():
1507 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1508 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1509
1510 elif oCaseFile.isVBoxGAsTest():
1511 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1512 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1513
1514 elif oCaseFile.isVBoxAPITest():
1515 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1516 fRc = self.investigateVBoxApiTest(oCaseFile);
1517
1518 elif oCaseFile.isVBoxBenchmarkTest():
1519 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1520 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1521
1522 elif oCaseFile.isVBoxSmokeTest():
1523 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1524 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1525
1526 elif oCaseFile.isVBoxSerialTest():
1527 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1528 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1529
1530 else:
1531 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1532 fRc = False;
1533 cGot += fRc is True;
1534
1535 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1536 return 0;
1537
1538
1539 def main(self):
1540 """
1541 The 'main' function.
1542 Return exit code (0, 1, etc).
1543 """
1544 # Database stuff.
1545 self.oDb = TMDatabaseConnection()
1546 self.oTestResultLogic = TestResultLogic(self.oDb);
1547 self.oTestSetLogic = TestSetLogic(self.oDb);
1548 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1549 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1550 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1551 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1552
1553 # Get a fix on our 'now' before we do anything..
1554 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1555 self.tsNow = self.oDb.fetchOne();
1556
1557 # If we're suppost to commit anything we need to get our user ID.
1558 rcExit = 0;
1559 if self.oConfig.fRealRun:
1560 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1561 if self.oLogin is None:
1562 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1563 else:
1564 self.uidSelf = self.oLogin.uid;
1565
1566 #
1567 # Do the stuff.
1568 #
1569 if rcExit == 0:
1570 rcExit = self.selfCheck();
1571 if rcExit == 0:
1572 rcExit = self.badTestBoxManagement();
1573 rcExit2 = self.reasoningFailures();
1574 if rcExit == 0:
1575 rcExit = rcExit2;
1576 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1577 if rcExit == 0:
1578 rcExit = self.badTestBoxManagement();
1579
1580 # Cleanup.
1581 self.oFailureReasonLogic = None;
1582 self.oTestResultFailureLogic = None;
1583 self.oTestSetLogic = None;
1584 self.oTestResultLogic = None;
1585 self.oDb.close();
1586 self.oDb = None;
1587 if self.oLogFile is not None:
1588 self.oLogFile.close();
1589 self.oLogFile = None;
1590 return rcExit;
1591
1592if __name__ == '__main__':
1593 sys.exit(VirtualTestSheriff().main());
1594
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette