VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 80140

Last change on this file since 80140 was 80140, checked in by vboxsync, 6 years ago

vsheriff: Recognize some GA test failures.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 83.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 80140 2019-08-05 19:59:41Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2019 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 80140 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower() == 'misc: unit tests');
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(StringIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 oHash.update(oImage.tostring());
275 except Exception as oXcpt:
276 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
277 else:
278 return oHash.hexdigest();
279 return None;
280
281
282
283 def isSingleTestFailure(self):
284 """
285 Figure out if this is a single test failing or if it's one of the
286 more complicated ones.
287 """
288 if self.oTree.cErrors == 1:
289 return True;
290 if self.oTree.deepCountErrorContributers() <= 1:
291 return True;
292 return False;
293
294
295
296class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
297 """
298 Add build info into Test Manager database.
299 """
300
301 ## The user account for the virtual sheriff.
302 ksLoginName = 'vsheriff';
303
304 def __init__(self):
305 """
306 Parse command line.
307 """
308 self.oDb = None;
309 self.tsNow = None;
310 self.oTestResultLogic = None;
311 self.oTestSetLogic = None;
312 self.oFailureReasonLogic = None; # FailureReasonLogic;
313 self.oTestResultFailureLogic = None; # TestResultFailureLogic
314 self.oLogin = None;
315 self.uidSelf = -1;
316 self.oLogFile = None;
317 self.asBsodReasons = [];
318 self.asUnitTestReasons = [];
319
320 oParser = OptionParser();
321 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
322 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
323 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
324 help = 'Work period specified in hours. Defauls is 2 hours.');
325 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
326 help = 'Whether to commit the findings to the database. Default is a dry run.');
327 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
328 help = 'Only investigate this one. Accumulates IDs when repeated.');
329 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
330 help = 'Quiet execution');
331 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
332 help = 'Where to log messages.');
333 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
334 help = 'Enables debug mode.');
335
336 (self.oConfig, _) = oParser.parse_args();
337
338 if self.oConfig.sLogFile:
339 self.oLogFile = open(self.oConfig.sLogFile, "a");
340 self.oLogFile.write('VirtualTestSheriff: $Revision: 80140 $ \n');
341
342
343 def eprint(self, sText):
344 """
345 Prints error messages.
346 Returns 1 (for exit code usage.)
347 """
348 print('error: %s' % (sText,));
349 if self.oLogFile is not None:
350 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
351 return 1;
352
353 def dprint(self, sText):
354 """
355 Prints debug info.
356 """
357 if self.oConfig.fDebug:
358 if not self.oConfig.fQuiet:
359 print('debug: %s' % (sText, ));
360 if self.oLogFile is not None:
361 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
362 return 0;
363
364 def vprint(self, sText):
365 """
366 Prints verbose info.
367 """
368 if not self.oConfig.fQuiet:
369 print('info: %s' % (sText,));
370 if self.oLogFile is not None:
371 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
372 return 0;
373
374 def getFailureReason(self, tReason):
375 """ Gets the failure reason object for tReason. """
376 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
377
378 def selfCheck(self):
379 """ Does some self checks, looking up things we expect to be in the database and such. """
380 rcExit = 0;
381 for sAttr in dir(self.__class__):
382 if sAttr.startswith('ktReason_'):
383 tReason = getattr(self.__class__, sAttr);
384 oFailureReason = self.getFailureReason(tReason);
385 if oFailureReason is None:
386 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
387 % (tReason[1], tReason[0],));
388
389 # Check the user account as well.
390 if self.oLogin is None:
391 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
392 if oLogin is None:
393 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
394 return rcExit;
395
396 def sendEmailAlert(self, uidAuthor, sBodyText):
397 """
398 Sends email alert.
399 """
400
401 # Get author email
402 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
403 sFrom = self.oDb.fetchOne();
404 if sFrom is not None:
405 sFrom = sFrom[0];
406 else:
407 sFrom = g_ksAlertFrom;
408
409 # Gather recipient list.
410 asEmailList = [];
411 for sUser in g_asAlertList:
412 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
413 sEmail = self.oDb.fetchOne();
414 if sEmail:
415 asEmailList.append(sEmail[0]);
416 if not asEmailList:
417 return self.eprint('No email addresses to send alter to!');
418
419 # Compose the message.
420 oMsg = MIMEMultipart();
421 oMsg['From'] = sFrom;
422 oMsg['To'] = COMMASPACE.join(asEmailList);
423 oMsg['Subject'] = g_ksAlertSubject;
424 oMsg.attach(MIMEText(sBodyText, 'plain'))
425
426 # Try send it.
427 try:
428 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
429 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
430 oSMTP.quit()
431 except smtplib.SMTPException as oXcpt:
432 return self.eprint('Failed to send mail: %s' % (oXcpt,));
433
434 return 0;
435
436 def badTestBoxManagement(self):
437 """
438 Looks for bad test boxes and first tries once to reboot them then disables them.
439 """
440 rcExit = 0;
441
442 #
443 # We skip this entirely if we're running in the past and not in harmless debug mode.
444 #
445 if self.oConfig.cStartHoursAgo != 0 \
446 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
447 return rcExit;
448 tsNow = self.tsNow if self.oConfig.fDebug else None;
449 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
450 oTestBoxLogic = TestBoxLogic(self.oDb);
451
452 #
453 # Generate a list of failures reasons we consider bad-testbox behavior.
454 #
455 aidFailureReasons = [
456 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
457 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
458 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
459 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
460 ];
461
462 #
463 # Get list of bad test boxes for given period and check them out individually.
464 #
465 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
466 aidFailureReasons = aidFailureReasons);
467 for idTestBox in aidBadTestBoxes:
468 # Skip if the testbox is already disabled or has a pending reboot command.
469 try:
470 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
471 except Exception as oXcpt:
472 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
473 continue;
474 if not oTestBox.fEnabled:
475 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
476 % ( idTestBox, oTestBox.sName, ));
477 continue;
478 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
479 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
480 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
481 continue;
482
483 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
484 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
485 cOkay = 0;
486 cBad = 0;
487 iFirstOkay = len(aoSets);
488 for iSet, oSet in enumerate(aoSets):
489 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
490 cBad += 1;
491 else:
492 # Check for bad failure reasons.
493 oFailure = None;
494 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
495 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
496 aoFailedResults = oTree.getListOfFailures();
497 for oFailedResult in aoFailedResults:
498 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
499 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
500 break;
501 else:
502 oFailure = None;
503 if oFailure is not None:
504 cBad += 1;
505 else:
506 # This is an okay test result then.
507 ## @todo maybe check the elapsed time here, it could still be a bad run?
508 cOkay += 1;
509 if iFirstOkay > iSet:
510 iFirstOkay = iSet;
511 if iSet > 10:
512 break;
513
514 # We react if there are two or more bad-testbox statuses at the head of the
515 # history and at least three in the last 10 results.
516 if iFirstOkay >= 2 and cBad > 2:
517 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
518 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
519 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
520 self.vprint(sComment);
521 self.sendEmailAlert(self.uidSelf, sComment);
522 if self.oConfig.fRealRun is True:
523 try:
524 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
525 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
526 % (iFirstOkay, cBad, cOkay),);
527 except Exception as oXcpt:
528 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
529 else:
530 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
531 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
532 self.vprint(sComment);
533 self.sendEmailAlert(self.uidSelf, sComment);
534 if self.oConfig.fRealRun is True:
535 try:
536 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
537 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
538 % (iFirstOkay, cBad, cOkay),);
539 except Exception as oXcpt:
540 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
541 else:
542 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
543 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
544
545 #
546 # Reset hanged testboxes
547 #
548 cStatusTimeoutMins = 10;
549
550 self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
551 ' FROM TestBoxStatuses, TestBoxes\n'
552 ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
553 ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
554 ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
555 ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
556 for idTestBox in self.oDb.fetchAll():
557 idTestBox = idTestBox[0];
558 try:
559 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
560 except Exception as oXcpt:
561 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
562 continue;
563 # Skip if the testbox is already disabled, already reset or there's no iLOM
564 if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
565 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
566 % ( idTestBox, oTestBox.sName, ));
567 continue;
568 ## @todo get iLOM credentials from a table?
569 sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
570 try:
571 oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
572 sStdout = oPs.communicate()[0];
573 iRC = oPs.wait();
574
575 oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
576 oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
577
578 sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
579 self.vprint(sComment);
580 self.sendEmailAlert(self.uidSelf, sComment);
581
582 except Exception as oXcpt:
583 rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
584
585 return rcExit;
586
587
588 ## @name Failure reasons we know.
589 ## @{
590 ktReason_Add_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest Timeout' );
591 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
592 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
593 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
594 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
595 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
596 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
597 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
598 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
599 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
600 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
601 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
602 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
603 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
604 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
605 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
606 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
607 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
608 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
609 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
610 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
611 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
612 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
613 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
614 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
615 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
616 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
617 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
618 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
619 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
620 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
621 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
622 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
623 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
624 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
625 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
626 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
627 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
628 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
629 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
630 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
631 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
632 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
633 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
634 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
635 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
636 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
637 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
638 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
639 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
640 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
641 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
642 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
643 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
644 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
645 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
646 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
647 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
648 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
649 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
650 ## @}
651
652 ## BSOD category.
653 ksBsodCategory = 'BSOD';
654 ## Special reason indicating that the flesh and blood sheriff has work to do.
655 ksBsodAddNew = 'Add new BSOD';
656
657 ## Unit test category.
658 ksUnitTestCategory = 'Unit';
659 ## Special reason indicating that the flesh and blood sheriff has work to do.
660 ksUnitTestAddNew = 'Add new';
661
662 ## Used for indica that we shouldn't report anything for this test result ID and
663 ## consider promoting the previous error to test set level if it's the only one.
664 ktHarmless = ( 'Probably', 'Caused by previous error' );
665
666
667 def caseClosed(self, oCaseFile):
668 """
669 Reports the findings in the case and closes it.
670 """
671 #
672 # Log it and create a dReasonForReasultId we can use below.
673 #
674 dCommentForResultId = oCaseFile.dCommentForResultId;
675 if oCaseFile.dReasonForResultId:
676 # Must weed out ktHarmless.
677 dReasonForResultId = {};
678 for idKey, tReason in oCaseFile.dReasonForResultId.items():
679 if tReason is not self.ktHarmless:
680 dReasonForResultId[idKey] = tReason;
681 if not dReasonForResultId:
682 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
683 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
684 return False;
685
686 # Try promote to single reason.
687 atValues = dReasonForResultId.values();
688 fSingleReason = True;
689 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
690 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
691 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
692 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
693 else:
694 fSingleReason = False;
695 if fSingleReason:
696 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
697 if dCommentForResultId:
698 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
699 elif oCaseFile.tReason is not None:
700 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
701 else:
702 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
703 return False;
704
705 self.vprint(u'Closing %s with following reason%s: %s'
706 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
707
708 #
709 # Add the test failure reason record(s).
710 #
711 for idTestResult, tReason in dReasonForResultId.items():
712 oFailureReason = self.getFailureReason(tReason);
713 if oFailureReason is not None:
714 sComment = 'Set by $Revision: 80140 $' # Handy for reverting later.
715 if idTestResult in dCommentForResultId:
716 sComment += ': ' + dCommentForResultId[idTestResult];
717
718 oAdd = TestResultFailureData();
719 oAdd.initFromValues(idTestResult = idTestResult,
720 idFailureReason = oFailureReason.idFailureReason,
721 uidAuthor = self.uidSelf,
722 idTestSet = oCaseFile.oTestSet.idTestSet,
723 sComment = sComment,);
724 if self.oConfig.fRealRun:
725 try:
726 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
727 except Exception as oXcpt:
728 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
729 % (oXcpt, oAdd, oCaseFile.sLongName,));
730 else:
731 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
732 return True;
733
734 #
735 # Tools for assiting log parsing.
736 #
737
738 @staticmethod
739 def matchFollowedByLines(sStr, off, asFollowingLines):
740 """ Worker for isThisFollowedByTheseLines. """
741
742 # Advance off to the end of the line.
743 off = sStr.find('\n', off);
744 if off < 0:
745 return False;
746 off += 1;
747
748 # Match each string with the subsequent lines.
749 for iLine, sLine in enumerate(asFollowingLines):
750 offEnd = sStr.find('\n', off);
751 if offEnd < 0:
752 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
753 if sLine and sStr.find(sLine, off, offEnd) < 0:
754 return False;
755
756 # next line.
757 off = offEnd + 1;
758
759 return True;
760
761 @staticmethod
762 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
763 """
764 Looks for a line contining sFirst which is then followed by lines
765 with the strings in asFollowingLines. (No newline chars anywhere!)
766 Returns True / False.
767 """
768 off = sStr.find(sFirst, 0);
769 while off >= 0:
770 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
771 return True;
772 off = sStr.find(sFirst, off + 1);
773 return False;
774
775 @staticmethod
776 def findAndReturnRestOfLine(sHaystack, sNeedle):
777 """
778 Looks for sNeedle in sHaystack.
779 Returns The text following the needle up to the end of the line.
780 Returns None if not found.
781 """
782 if sHaystack is None:
783 return None;
784 off = sHaystack.find(sNeedle);
785 if off < 0:
786 return None;
787 off += len(sNeedle)
788 offEol = sHaystack.find('\n', off);
789 if offEol < 0:
790 offEol = len(sHaystack);
791 return sHaystack[off:offEol]
792
793 @staticmethod
794 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
795 """
796 Looks for sNeedle in zeroe or more haystacks (asHaystack).
797 Returns The text following the first needed found up to the end of the line.
798 Returns None if not found.
799 """
800 for sHaystack in asHaystacks:
801 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
802 if sRet is not None:
803 return sRet;
804 return None;
805
806
807 #
808 # The investigative units.
809 #
810
811 katSimpleInstallUninstallMainLogReasons = [
812 # ( Whether to stop on hit, reason tuple, needle text. )
813 ( False, ktReason_Host_LeftoverService,
814 'SERVICE_NAME: vbox' ),
815 ];
816
817 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
818 'darwin': [
819 # ( Whether to stop on hit, reason tuple, needle text. )
820 ( True, ktReason_Host_DriverNotUnloading,
821 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
822 ],
823 'linux': [
824 # ( Whether to stop on hit, reason tuple, needle text. )
825 ( True, ktReason_Host_DriverNotCompilable,
826 'This system is not currently set up to build kernel modules' ),
827 ( True, ktReason_Host_DriverNotCompilable,
828 'This system is currently not set up to build kernel modules' ),
829 ( True, ktReason_Host_InstallationFailed,
830 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
831 ( True, ktReason_Host_DriverNotUnloading,
832 'Cannot unload module vboxdrv'),
833 ],
834 'solaris': [
835 # ( Whether to stop on hit, reason tuple, needle text. )
836 ( True, ktReason_Host_InstallationFailed,
837 'svcadm: Couldn\'t bind to svc.configd.' ),
838 ( True, ktReason_Host_InstallationFailed,
839 'pkgadd: ERROR: postinstall script did not complete successfully' ),
840 ( True, ktReason_Host_DriverNotUnloading,
841 'can\'t unload the module: Device busy' ),
842 ],
843 };
844
845
846 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
847 """
848 Investigates an install or uninstall failure.
849
850 We lump the two together since the installation typically also performs
851 an uninstall first and will be seeing similar issues to the uninstall.
852 """
853
854 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
855 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
856 return True;
857
858 atSimple = self.katSimpleInstallUninstallMainLogReasons;
859 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
860 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
861
862 fFoundSomething = False;
863 for fStopOnHit, tReason, sNeedle in atSimple:
864 if sResultLog.find(sNeedle) > 0:
865 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
866 if fStopOnHit:
867 return True;
868 fFoundSomething = True;
869
870 return fFoundSomething if fFoundSomething else None;
871
872
873 def investigateBadTestBox(self, oCaseFile):
874 """
875 Checks out bad-testbox statuses.
876 """
877 _ = oCaseFile;
878 return False;
879
880
881 def investigateVBoxUnitTest(self, oCaseFile):
882 """
883 Checks out a VBox unittest problem.
884 """
885
886 #
887 # Process simple test case failures first, using their name as reason.
888 # We do the reason management just like for BSODs.
889 #
890 cRelevantOnes = 0;
891 sMainLog = oCaseFile.getMainLog();
892 aoFailedResults = oCaseFile.oTree.getListOfFailures();
893 for oFailedResult in aoFailedResults:
894 if oFailedResult is oCaseFile.oTree:
895 self.vprint('TODO: toplevel failure');
896 cRelevantOnes += 1
897
898 elif oFailedResult.sName == 'Installing VirtualBox':
899 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
900 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
901 cRelevantOnes += 1
902
903 elif oFailedResult.sName == 'Uninstalling VirtualBox':
904 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
905 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
906 cRelevantOnes += 1
907
908 elif oFailedResult.oParent is not None:
909 # Get the 2nd level node because that's where we'll find the unit test name.
910 while oFailedResult.oParent.oParent is not None:
911 oFailedResult = oFailedResult.oParent;
912
913 # Only report a failure once.
914 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
915 sKey = oFailedResult.sName;
916 if sKey.startswith('testcase/'):
917 sKey = sKey[9:];
918 if sKey in self.asUnitTestReasons:
919 tReason = ( self.ksUnitTestCategory, sKey );
920 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
921 else:
922 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
923 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
924 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
925 cRelevantOnes += 1
926 else:
927 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
928
929 #
930 # If we've caught all the relevant ones by now, report the result.
931 #
932 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
933 return self.caseClosed(oCaseFile);
934 return False;
935
936 def extractGuestCpuStack(self, sInfoText):
937 """
938 Extracts the guest CPU stacks from the input file.
939
940 Returns a dictionary keyed by the CPU number, value being a list of
941 raw stack lines (no header).
942 Returns empty dictionary if no stacks where found.
943 """
944 dRet = {};
945 off = 0;
946 while True:
947 # Find the stack.
948 offStart = sInfoText.find('=== start guest stack VCPU ', off);
949 if offStart < 0:
950 break;
951 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
952 if offEnd >= 0:
953 offEnd += 3;
954 else:
955 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
956 if offEnd < 0:
957 offEnd = len(sInfoText);
958
959 sStack = sInfoText[offStart : offEnd];
960 sStack = sStack.replace('\r',''); # paranoia
961 asLines = sStack.split('\n');
962
963 # Figure the CPU.
964 asWords = asLines[0].split();
965 if len(asWords) < 6 or not asWords[5].isdigit():
966 break;
967 iCpu = int(asWords[5]);
968
969 # Add it and advance.
970 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
971 off = offEnd;
972 return dRet;
973
974 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
975 """ Investigates kvm_lock_spinning deadlocks """
976 #
977 # Extract the stacks. We need more than one CPU to create a deadlock.
978 #
979 dStacks = self.extractGuestCpuStack(sInfoText);
980 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
981 if len(dStacks) >= 2:
982 #
983 # Examin each of the stacks. Each must have kvm_lock_spinning in
984 # one of the first three entries.
985 #
986 cHits = 0;
987 for iCpu in dStacks:
988 asBacktrace = dStacks[iCpu];
989 for iFrame in xrange(min(3, len(asBacktrace))):
990 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
991 cHits += 1;
992 break;
993 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
994 if cHits == len(dStacks):
995 return (True, self.ktReason_VMM_kvm_lock_spinning);
996
997 _ = dLogs; _ = oCaseFile;
998 return (False, None);
999
1000 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1001 """ Investigates HalReturnToFirmware hangs """
1002 del oCaseFile
1003 del sInfoText
1004 del dLogs
1005 # hope that's sufficient
1006 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1007
1008 ## Things we search a main or VM log for to figure out why something went bust.
1009 ## @note DO NOT ADD MORE STUFF HERE!
1010 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1011 katSimpleMainAndVmLogReasonsDeprecated = [
1012 # ( Whether to stop on hit, reason tuple, needle text. )
1013 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1014 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1015 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1016 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1017 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1018 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1019 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1020 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1021 ( True, ktReason_Networking_Nonexistent_host_nic,
1022 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1023 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1024 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1025 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1026 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1027 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1028 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1029 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1030 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1031 ( True, ktReason_Unknown_File_Not_Found,
1032 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1033 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1034 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1035 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1036 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1037 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1038 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1039 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1040 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1041 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1042 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1043 ];
1044
1045 ## This we search a main log for to figure out why something went bust.
1046 katSimpleMainLogReasons = [
1047 # ( Whether to stop on hit, reason tuple, needle text. )
1048 ];
1049
1050 ## This we search a VM log for to figure out why something went bust.
1051 katSimpleVmLogReasons = [
1052 # ( Whether to stop on hit, reason tuple, needle text. )
1053 ];
1054
1055 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1056 katSimpleVBoxHardeningLogReasons = [
1057 # ( Whether to stop on hit, reason tuple, needle text. )
1058 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1059 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1060 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1061 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1062 ];
1063
1064 ## Things we search a kernel.log file for to figure out why something went bust.
1065 katSimpleKernelLogReasons = [
1066 # ( Whether to stop on hit, reason tuple, needle text. )
1067 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1068 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1069 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1070 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1071 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1072 ];
1073
1074 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1075 katSimpleVgaTextReasons = [
1076 # ( Whether to stop on hit, reason tuple, needle text. )
1077 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1078 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1079 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1080 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1081 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1082 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1083 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1084 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1085 "and send a report. Then try booting with the 'noapic' option\n"
1086 "\n" ),
1087 ( True, ktReason_OSInstall_GRUB_hang,
1088 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1089 ( True, ktReason_OSInstall_GRUB_hang,
1090 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1091 ( True, ktReason_OSInstall_GRUB_hang,
1092 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1093 ( True, ktReason_OSInstall_GRUB_hang,
1094 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1095 ( True, ktReason_OSInstall_Udev_hang,
1096 "\nStarting udev:\n\n\n\n" ),
1097 ( True, ktReason_OSInstall_Udev_hang,
1098 "\nStarting udev:\n------" ),
1099 ( True, ktReason_Panic_BootManagerC000000F,
1100 "Windows failed to start. A recent hardware or software change might be the" ),
1101 ( True, ktReason_BootManager_Image_corrupt,
1102 "BOOTMGR image is corrupt. The system cannot boot." ),
1103 ];
1104
1105 ## Things we search for in the info.txt file. Require handlers for now.
1106 katInfoTextHandlers = [
1107 # ( Trigger text, handler method )
1108 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1109 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1110 ];
1111
1112 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1113 katSimpleScreenshotHashReasons = [
1114 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1115 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1116 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1117 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1118 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1119 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1120 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1121 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1122 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1123 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1124 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1125 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1126 ];
1127
1128
1129 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1130 """
1131 Scans for atNeedles in sLog.
1132
1133 Returns True if a stop-on-hit neelde was found.
1134 Returns None if a no-stop reason was found.
1135 Returns False if no hit.
1136 """
1137 fRet = False;
1138 for fStopOnHit, tReason, oNeedle in atNeedles:
1139 fMatch = False;
1140 if utils.isString(oNeedle):
1141 for sLog in asLogs:
1142 if sLog:
1143 fMatch |= sLog.find(oNeedle) > 0;
1144 else:
1145 for sLog in asLogs:
1146 if sLog:
1147 fMatch |= oNeedle.search(sLog) is not None;
1148 if fMatch:
1149 oCaseFile.noteReasonForId(tReason, idTestResult);
1150 if fStopOnHit:
1151 return True;
1152 fRet = None;
1153 return fRet;
1154
1155
1156 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1157 """
1158 Investigates a failed VM run.
1159 """
1160 enmReason = None;
1161 if oFailedResult.sName == 'mmap':
1162 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1163 enmReason = self.ktReason_Add_Mmap_Coherency;
1164 elif sResultLog.find('FlushViewOfFile') >= 0:
1165 enmReason = self.ktReason_Add_FlushViewOfFile;
1166 elif oFailedResult.sName == 'Copy to guest':
1167 if sResultLog.find('*** abort action ***') >= 0:
1168 enmReason = self.ktReason_Add_CopyToGuest_Timeout;
1169
1170 if enmReason is not None:
1171 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1172
1173 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1174 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1175 return False;
1176
1177 def isResultFromGATest(self, oFailedResult):
1178 """
1179 Checks if this result and corresponding log snippet looks like a GA test run.
1180 """
1181 while oFailedResult is not None:
1182 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', ]:
1183 return True;
1184 oFailedResult = oFailedResult.oParent;
1185 return False;
1186
1187
1188 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1189 """
1190 Investigates a failed VM run.
1191 """
1192
1193 def investigateLogSet():
1194 """
1195 Investigates the current set of VM related logs.
1196 """
1197 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1198 % ( len(sResultLog if sResultLog else ''),
1199 len(sVMLog if sVMLog else ''),
1200 len(sKrnlLog if sKrnlLog else ''),
1201 len(sVgaText if sVgaText else ''),
1202 len(sInfoText if sInfoText else ''),
1203 len(sNtHardLog if sNtHardLog else ''),));
1204
1205 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1206 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1207 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1208 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1209 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1210 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1211
1212 # TODO: more
1213
1214 #
1215 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1216 #
1217 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1218 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1219 if sDetails is not None:
1220 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1221 sKey = sDetails.split(' ', 1)[0];
1222 try: sKey = '0x%08X' % (int(sKey, 16),);
1223 except: pass;
1224 if sKey in self.asBsodReasons:
1225 tReason = ( self.ksBsodCategory, sKey );
1226 elif sKey.lower() in self.asBsodReasons: # just in case.
1227 tReason = ( self.ksBsodCategory, sKey.lower() );
1228 else:
1229 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1230 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1231 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1232
1233 fFoundSomething = False;
1234
1235 #
1236 # Look for linux panic.
1237 #
1238 if sKrnlLog is not None:
1239 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1240 if fRet is True:
1241 return fRet;
1242 fFoundSomething |= fRet is None;
1243
1244 #
1245 # Loop thru the simple stuff.
1246 #
1247
1248 # Main log.
1249 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1250 if fRet is True:
1251 return fRet;
1252 fFoundSomething |= fRet is None;
1253
1254 # VM log.
1255 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1256 if fRet is True:
1257 return fRet;
1258 fFoundSomething |= fRet is None;
1259
1260 # Old main + vm log.
1261 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1262 oCaseFile, oFailedResult.idTestResult);
1263 if fRet is True:
1264 return fRet;
1265 fFoundSomething |= fRet is None;
1266
1267 # Continue with vga text.
1268 if sVgaText:
1269 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1270 if fRet is True:
1271 return fRet;
1272 fFoundSomething |= fRet is None;
1273
1274 # Continue with screen hashes.
1275 if sScreenHash is not None:
1276 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1277 if sScreenHash == sHash:
1278 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1279 if fStopOnHit:
1280 return True;
1281 fFoundSomething = True;
1282
1283 # Check VBoxHardening.log.
1284 if sNtHardLog is not None:
1285 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1286 if fRet is True:
1287 return fRet;
1288 fFoundSomething |= fRet is None;
1289
1290 #
1291 # Complicated stuff.
1292 #
1293 dLogs = {
1294 'sVMLog': sVMLog,
1295 'sNtHardLog': sNtHardLog,
1296 'sScreenHash': sScreenHash,
1297 'sKrnlLog': sKrnlLog,
1298 'sVgaText': sVgaText,
1299 'sInfoText': sInfoText,
1300 };
1301
1302 # info.txt.
1303 if sInfoText:
1304 for sNeedle, fnHandler in self.katInfoTextHandlers:
1305 if sInfoText.find(sNeedle) > 0:
1306 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1307 if tReason is not None:
1308 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1309 if fStop:
1310 return True;
1311 fFoundSomething = True;
1312
1313 #
1314 # Check for repeated reboots...
1315 #
1316 if sVMLog is not None:
1317 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1318 if cResets > 10:
1319 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1320 sComment = 'Counted %s reboots' % (cResets,));
1321
1322 return fFoundSomething;
1323
1324 #
1325 # Check if we got any VM or/and kernel logs. Treat them as sets in
1326 # case we run multiple VMs here (this is of course ASSUMING they
1327 # appear in the order that terminateVmBySession uploads them).
1328 #
1329 cTimes = 0;
1330 sVMLog = None;
1331 sNtHardLog = None;
1332 sScreenHash = None;
1333 sKrnlLog = None;
1334 sVgaText = None;
1335 sInfoText = None;
1336 for oFile in oFailedResult.aoFiles:
1337 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1338 if 'VBoxHardening.log' not in oFile.sFile:
1339 if sVMLog is not None:
1340 if investigateLogSet() is True:
1341 return True;
1342 cTimes += 1;
1343 sInfoText = None;
1344 sVgaText = None;
1345 sKrnlLog = None;
1346 sScreenHash = None;
1347 sNtHardLog = None;
1348 sVMLog = oCaseFile.getLogFile(oFile);
1349 else:
1350 sNtHardLog = oCaseFile.getLogFile(oFile);
1351 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1352 sKrnlLog = oCaseFile.getLogFile(oFile);
1353 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1354 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1355 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1356 sInfoText = oCaseFile.getLogFile(oFile);
1357 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1358 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1359 if sScreenHash is not None:
1360 sScreenHash = sScreenHash.lower();
1361 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1362
1363 if ( sVMLog is not None \
1364 or sNtHardLog is not None \
1365 or cTimes == 0) \
1366 and investigateLogSet() is True:
1367 return True;
1368
1369 return None;
1370
1371 def isResultFromVMRun(self, oFailedResult, sResultLog):
1372 """
1373 Checks if this result and corresponding log snippet looks like a VM run.
1374 """
1375
1376 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1377 if sResultLog.find(' startVm') > 0:
1378 return True;
1379
1380 # Any other indicators? No?
1381 _ = oFailedResult;
1382 return False;
1383
1384
1385 ## Things we search a VBoxSVC log for to figure out why something went bust.
1386 katSimpleSvcLogReasons = [
1387 # ( Whether to stop on hit, reason tuple, needle text. )
1388 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1389 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1390 ];
1391
1392 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1393 """
1394 Check the VBoxSVC log for a single VM run.
1395 """
1396 if sSvcLog:
1397 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1398 if fRet is True or fRet is None:
1399 return True;
1400 return False;
1401
1402 def investigateNtHardLogForVMRun(self, oCaseFile):
1403 """
1404 Check if the hardening log for a single VM run contains VM crash indications.
1405 """
1406 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1407 for oLogFile in aoLogFiles:
1408 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1409 sLog = oCaseFile.getLogFile(oLogFile);
1410 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1411 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1412 return False;
1413
1414
1415 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1416 """
1417 Checks out a VBox VM test.
1418
1419 This is generic investigation of a test running one or more VMs, like
1420 for example a smoke test or a guest installation test.
1421
1422 The fSingleVM parameter is a hint, which probably won't come in useful.
1423 """
1424 _ = fSingleVM;
1425
1426 #
1427 # Get a list of test result failures we should be looking into and the main log.
1428 #
1429 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1430 sMainLog = oCaseFile.getMainLog();
1431
1432 #
1433 # There are a set of errors ending up on the top level result record.
1434 # Should deal with these first.
1435 #
1436 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1437 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1438 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1439 # ignore other failures in the test if we're not a little bit careful here.
1440 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1441 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1442 return self.caseClosed(oCaseFile);
1443
1444 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1445 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1446 (' Timeout: ', ' Attempting to abort child...',) ):
1447 if sMainLog.find('*** glibc detected *** /') > 0:
1448 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1449 else:
1450 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1451 return self.caseClosed(oCaseFile);
1452
1453 # Look for heap corruption without visible hang.
1454 if sMainLog.find('*** glibc detected *** /') > 0 \
1455 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1456 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1457 return self.caseClosed(oCaseFile);
1458
1459 # Out of memory w/ timeout.
1460 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1461 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1462 return self.caseClosed(oCaseFile);
1463
1464 # Stale files like vts_rm.exe (windows).
1465 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1466 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1467 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1468 return self.caseClosed(oCaseFile);
1469
1470 #
1471 # XPCOM screwup
1472 #
1473 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1474 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1475 return self.caseClosed(oCaseFile);
1476
1477 #
1478 # Go thru each failed result.
1479 #
1480 for oFailedResult in aoFailedResults:
1481 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1482 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1483 if oFailedResult.sName == 'Installing VirtualBox':
1484 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1485
1486 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1487 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1488
1489 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1490 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1491
1492 elif self.isResultFromGATest(oFailedResult):
1493 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1494
1495 elif sResultLog.find('most likely not unique') > 0:
1496 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1497 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1498 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1499
1500 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1501 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1502 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1503
1504 elif sResultLog.find('** error: no action was specified') > 0 \
1505 or sResultLog.find('(len(self._asXml, asText))') > 0:
1506 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1507
1508 else:
1509 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1510 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1511
1512 #
1513 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1514 #
1515 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1516 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1517 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1518 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1519 return self.caseClosed(oCaseFile);
1520 if self.investigateNtHardLogForVMRun(oCaseFile):
1521 return self.caseClosed(oCaseFile);
1522
1523 #
1524 # Report home and close the case if we got them all, otherwise log it.
1525 #
1526 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1527 return self.caseClosed(oCaseFile);
1528
1529 if oCaseFile.dReasonForResultId:
1530 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1531 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1532 else:
1533 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1534 return False;
1535
1536
1537 ## Things we search a main log for to figure out why something in the API test went bust.
1538 katSimpleApiMainLogReasons = [
1539 # ( Whether to stop on hit, reason tuple, needle text. )
1540 ( True, ktReason_Networking_Nonexistent_host_nic,
1541 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1542 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1543 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1544 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1545 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1546 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1547 ( True, ktReason_API_MoveVM_InvalidParameter,
1548 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1549 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1550 ];
1551
1552 def investigateVBoxApiTest(self, oCaseFile):
1553 """
1554 Checks out a VBox API test.
1555 """
1556
1557 #
1558 # Get a list of test result failures we should be looking into and the main log.
1559 #
1560 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1561 sMainLog = oCaseFile.getMainLog();
1562
1563 #
1564 # Go thru each failed result.
1565 #
1566 for oFailedResult in aoFailedResults:
1567 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1568 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1569 if oFailedResult.sName == 'Installing VirtualBox':
1570 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1571
1572 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1573 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1574
1575 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1576 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1577
1578 else:
1579 fFoundSomething = False;
1580 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1581 if sResultLog.find(sNeedle) > 0:
1582 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1583 fFoundSomething = True;
1584 if fStopOnHit:
1585 break;
1586 if fFoundSomething:
1587 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1588 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1589
1590 #
1591 # Report home and close the case if we got them all, otherwise log it.
1592 #
1593 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1594 return self.caseClosed(oCaseFile);
1595
1596 if oCaseFile.dReasonForResultId:
1597 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1598 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1599 else:
1600 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1601 return False;
1602
1603
1604 def reasoningFailures(self):
1605 """
1606 Guess the reason for failures.
1607 """
1608 #
1609 # Get a list of failed test sets without any assigned failure reason.
1610 #
1611 cGot = 0;
1612 if not self.oConfig.aidTestSets:
1613 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1614 tsNow = self.tsNow);
1615 else:
1616 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1617 for oTestSet in aoTestSets:
1618 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1619 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1620
1621 #
1622 # Open a case file and assign it to the right investigator.
1623 #
1624 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1625 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1626 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1627 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1628 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1629
1630 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1631
1632 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1633 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1634 fRc = self.investigateBadTestBox(oCaseFile);
1635
1636 elif oCaseFile.isVBoxUnitTest():
1637 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1638 fRc = self.investigateVBoxUnitTest(oCaseFile);
1639
1640 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1641 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1642 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1643
1644 elif oCaseFile.isVBoxUSBTest():
1645 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1646 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1647
1648 elif oCaseFile.isVBoxStorageTest():
1649 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1650 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1651
1652 elif oCaseFile.isVBoxGAsTest():
1653 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1654 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1655
1656 elif oCaseFile.isVBoxAPITest():
1657 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1658 fRc = self.investigateVBoxApiTest(oCaseFile);
1659
1660 elif oCaseFile.isVBoxBenchmarkTest():
1661 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1662 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1663
1664 elif oCaseFile.isVBoxSmokeTest():
1665 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1666 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1667
1668 elif oCaseFile.isVBoxSerialTest():
1669 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1670 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1671
1672 else:
1673 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1674 fRc = False;
1675 cGot += fRc is True;
1676
1677 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1678 return 0;
1679
1680
1681 def main(self):
1682 """
1683 The 'main' function.
1684 Return exit code (0, 1, etc).
1685 """
1686 # Database stuff.
1687 self.oDb = TMDatabaseConnection()
1688 self.oTestResultLogic = TestResultLogic(self.oDb);
1689 self.oTestSetLogic = TestSetLogic(self.oDb);
1690 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1691 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1692 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1693 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1694
1695 # Get a fix on our 'now' before we do anything..
1696 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1697 self.tsNow = self.oDb.fetchOne();
1698
1699 # If we're suppost to commit anything we need to get our user ID.
1700 rcExit = 0;
1701 if self.oConfig.fRealRun:
1702 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1703 if self.oLogin is None:
1704 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1705 else:
1706 self.uidSelf = self.oLogin.uid;
1707
1708 #
1709 # Do the stuff.
1710 #
1711 if rcExit == 0:
1712 rcExit = self.selfCheck();
1713 if rcExit == 0:
1714 rcExit = self.badTestBoxManagement();
1715 rcExit2 = self.reasoningFailures();
1716 if rcExit == 0:
1717 rcExit = rcExit2;
1718 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1719 if rcExit == 0:
1720 rcExit = self.badTestBoxManagement();
1721
1722 # Cleanup.
1723 self.oFailureReasonLogic = None;
1724 self.oTestResultFailureLogic = None;
1725 self.oTestSetLogic = None;
1726 self.oTestResultLogic = None;
1727 self.oDb.close();
1728 self.oDb = None;
1729 if self.oLogFile is not None:
1730 self.oLogFile.close();
1731 self.oLogFile = None;
1732 return rcExit;
1733
1734if __name__ == '__main__':
1735 sys.exit(VirtualTestSheriff().main());
1736
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette