VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 92301

Last change on this file since 92301 was 90595, checked in by vboxsync, 3 years ago

ValKit: More Python 3.9 API changes needed (array.array.tostring() -> .tobytes()) bugref:10079

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 85.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 90595 2021-08-10 12:49:53Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2020 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 90595 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46#import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as StringIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower() == 'misc: unit tests');
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(StringIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 if sys.version_info < (3, 9, 0):
275 # Removed since Python 3.9.
276 oHash.update(oImage.tostring()); # pylint: disable=no-member
277 else:
278 oHash.update(oImage.tobytes());
279 except Exception as oXcpt:
280 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
281 else:
282 return oHash.hexdigest();
283 return None;
284
285
286
287 def isSingleTestFailure(self):
288 """
289 Figure out if this is a single test failing or if it's one of the
290 more complicated ones.
291 """
292 if self.oTree.cErrors == 1:
293 return True;
294 if self.oTree.deepCountErrorContributers() <= 1:
295 return True;
296 return False;
297
298
299
300class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
301 """
302 Add build info into Test Manager database.
303 """
304
305 ## The user account for the virtual sheriff.
306 ksLoginName = 'vsheriff';
307
308 def __init__(self):
309 """
310 Parse command line.
311 """
312 self.oDb = None;
313 self.tsNow = None;
314 self.oTestResultLogic = None;
315 self.oTestSetLogic = None;
316 self.oFailureReasonLogic = None; # FailureReasonLogic;
317 self.oTestResultFailureLogic = None; # TestResultFailureLogic
318 self.oLogin = None;
319 self.uidSelf = -1;
320 self.oLogFile = None;
321 self.asBsodReasons = [];
322 self.asUnitTestReasons = [];
323
324 oParser = OptionParser();
325 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
326 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
327 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
328 help = 'Work period specified in hours. Defauls is 2 hours.');
329 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
330 help = 'Whether to commit the findings to the database. Default is a dry run.');
331 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
332 help = 'Only investigate this one. Accumulates IDs when repeated.');
333 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
334 help = 'Quiet execution');
335 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
336 help = 'Where to log messages.');
337 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
338 help = 'Enables debug mode.');
339
340 (self.oConfig, _) = oParser.parse_args();
341
342 if self.oConfig.sLogFile:
343 self.oLogFile = open(self.oConfig.sLogFile, "a");
344 self.oLogFile.write('VirtualTestSheriff: $Revision: 90595 $ \n');
345
346
347 def eprint(self, sText):
348 """
349 Prints error messages.
350 Returns 1 (for exit code usage.)
351 """
352 print('error: %s' % (sText,));
353 if self.oLogFile is not None:
354 if sys.version_info[0] >= 3:
355 self.oLogFile.write(u'error: %s\n' % (sText,));
356 else:
357 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
358 return 1;
359
360 def dprint(self, sText):
361 """
362 Prints debug info.
363 """
364 if self.oConfig.fDebug:
365 if not self.oConfig.fQuiet:
366 print('debug: %s' % (sText, ));
367 if self.oLogFile is not None:
368 if sys.version_info[0] >= 3:
369 self.oLogFile.write(u'debug: %s\n' % (sText,));
370 else:
371 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
372 return 0;
373
374 def vprint(self, sText):
375 """
376 Prints verbose info.
377 """
378 if not self.oConfig.fQuiet:
379 print('info: %s' % (sText,));
380 if self.oLogFile is not None:
381 if sys.version_info[0] >= 3:
382 self.oLogFile.write(u'info: %s\n' % (sText,));
383 else:
384 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
385 return 0;
386
387 def getFailureReason(self, tReason):
388 """ Gets the failure reason object for tReason. """
389 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
390
391 def selfCheck(self):
392 """ Does some self checks, looking up things we expect to be in the database and such. """
393 rcExit = 0;
394 for sAttr in dir(self.__class__):
395 if sAttr.startswith('ktReason_'):
396 tReason = getattr(self.__class__, sAttr);
397 oFailureReason = self.getFailureReason(tReason);
398 if oFailureReason is None:
399 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
400 % (tReason[1], tReason[0],));
401
402 # Check the user account as well.
403 if self.oLogin is None:
404 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
405 if oLogin is None:
406 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
407 return rcExit;
408
409 def sendEmailAlert(self, uidAuthor, sBodyText):
410 """
411 Sends email alert.
412 """
413
414 # Get author email
415 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
416 sFrom = self.oDb.fetchOne();
417 if sFrom is not None:
418 sFrom = sFrom[0];
419 else:
420 sFrom = g_ksAlertFrom;
421
422 # Gather recipient list.
423 asEmailList = [];
424 for sUser in g_asAlertList:
425 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
426 sEmail = self.oDb.fetchOne();
427 if sEmail:
428 asEmailList.append(sEmail[0]);
429 if not asEmailList:
430 return self.eprint('No email addresses to send alter to!');
431
432 # Compose the message.
433 oMsg = MIMEMultipart();
434 oMsg['From'] = sFrom;
435 oMsg['To'] = COMMASPACE.join(asEmailList);
436 oMsg['Subject'] = g_ksAlertSubject;
437 oMsg.attach(MIMEText(sBodyText, 'plain'))
438
439 # Try send it.
440 try:
441 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
442 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
443 oSMTP.quit()
444 except smtplib.SMTPException as oXcpt:
445 return self.eprint('Failed to send mail: %s' % (oXcpt,));
446
447 return 0;
448
449 def badTestBoxManagement(self):
450 """
451 Looks for bad test boxes and first tries once to reboot them then disables them.
452 """
453 rcExit = 0;
454
455 #
456 # We skip this entirely if we're running in the past and not in harmless debug mode.
457 #
458 if self.oConfig.cStartHoursAgo != 0 \
459 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
460 return rcExit;
461 tsNow = self.tsNow if self.oConfig.fDebug else None;
462 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
463 oTestBoxLogic = TestBoxLogic(self.oDb);
464
465 #
466 # Generate a list of failures reasons we consider bad-testbox behavior.
467 #
468 aidFailureReasons = [
469 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
470 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
471 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
472 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
473 ];
474
475 #
476 # Get list of bad test boxes for given period and check them out individually.
477 #
478 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
479 aidFailureReasons = aidFailureReasons);
480 for idTestBox in aidBadTestBoxes:
481 # Skip if the testbox is already disabled or has a pending reboot command.
482 try:
483 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
484 except Exception as oXcpt:
485 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
486 continue;
487 if not oTestBox.fEnabled:
488 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
489 % ( idTestBox, oTestBox.sName, ));
490 continue;
491 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
492 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
493 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
494 continue;
495
496 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
497 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
498 cOkay = 0;
499 cBad = 0;
500 iFirstOkay = len(aoSets);
501 for iSet, oSet in enumerate(aoSets):
502 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
503 cBad += 1;
504 else:
505 # Check for bad failure reasons.
506 oFailure = None;
507 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
508 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
509 aoFailedResults = oTree.getListOfFailures();
510 for oFailedResult in aoFailedResults:
511 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
512 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
513 break;
514 oFailure = None;
515 if oFailure is not None:
516 cBad += 1;
517 else:
518 # This is an okay test result then.
519 ## @todo maybe check the elapsed time here, it could still be a bad run?
520 cOkay += 1;
521 if iFirstOkay > iSet:
522 iFirstOkay = iSet;
523 if iSet > 10:
524 break;
525
526 # We react if there are two or more bad-testbox statuses at the head of the
527 # history and at least three in the last 10 results.
528 if iFirstOkay >= 2 and cBad > 2:
529 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
530 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
531 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
532 self.vprint(sComment);
533 self.sendEmailAlert(self.uidSelf, sComment);
534 if self.oConfig.fRealRun is True:
535 try:
536 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
537 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
538 % (iFirstOkay, cBad, cOkay),);
539 except Exception as oXcpt:
540 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
541 else:
542 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
543 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
544 self.vprint(sComment);
545 self.sendEmailAlert(self.uidSelf, sComment);
546 if self.oConfig.fRealRun is True:
547 try:
548 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
549 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
550 % (iFirstOkay, cBad, cOkay),);
551 except Exception as oXcpt:
552 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
553 else:
554 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
555 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
556
557 ## @todo r=bird: review + rewrite;
558 ## - no selecting here, that belongs in the core/*.py files.
559 ## - preserve existing comments.
560 ## - doing way too much in the try/except block.
561 ## - No password quoting in the sshpass command that always fails (127).
562 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
563 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
564 ## Besides, it need to be constant elsewhere in the file, not a variable here.
565 ##
566 ##
567 ## Reset hanged testboxes
568 ##
569 #cStatusTimeoutMins = 10;
570 #
571 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
572 # ' FROM TestBoxStatuses, TestBoxes\n'
573 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
574 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
575 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
576 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
577 #for idTestBox in self.oDb.fetchAll():
578 # idTestBox = idTestBox[0];
579 # try:
580 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
581 # except Exception as oXcpt:
582 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
583 # continue;
584 # # Skip if the testbox is already disabled, already reset or there's no iLOM
585 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
586 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
587 # % ( idTestBox, oTestBox.sName, ));
588 # continue;
589 # ## @todo get iLOM credentials from a table?
590 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
591 # try:
592 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
593 # sStdout = oPs.communicate()[0];
594 # iRC = oPs.wait();
595 #
596 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
597 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
598 #
599 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
600 # self.vprint(sComment);
601 # self.sendEmailAlert(self.uidSelf, sComment);
602 #
603 # except Exception as oXcpt:
604 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
605 #
606 return rcExit;
607
608
609 ## @name Failure reasons we know.
610 ## @{
611 ktReason_Add_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest Timeout' );
612 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
613 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
614 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
615 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
616 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
617 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
618 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
619 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
620 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
621 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
622 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
623 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
624 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
625 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
626 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
627 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
628 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
629 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
630 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
631 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
632 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
633 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
634 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
635 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
636 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
637 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
638 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
639 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
640 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
641 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
642 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
643 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
644 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
645 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
646 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
647 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
648 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
649 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
650 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
651 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
652 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
653 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
654 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
655 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
656 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
657 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
658 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
659 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
660 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
661 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
662 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
663 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
664 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
665 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
666 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
667 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
668 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
669 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
670 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
671 ## @}
672
673 ## BSOD category.
674 ksBsodCategory = 'BSOD';
675 ## Special reason indicating that the flesh and blood sheriff has work to do.
676 ksBsodAddNew = 'Add new BSOD';
677
678 ## Unit test category.
679 ksUnitTestCategory = 'Unit';
680 ## Special reason indicating that the flesh and blood sheriff has work to do.
681 ksUnitTestAddNew = 'Add new';
682
683 ## Used for indica that we shouldn't report anything for this test result ID and
684 ## consider promoting the previous error to test set level if it's the only one.
685 ktHarmless = ( 'Probably', 'Caused by previous error' );
686
687
688 def caseClosed(self, oCaseFile):
689 """
690 Reports the findings in the case and closes it.
691 """
692 #
693 # Log it and create a dReasonForReasultId we can use below.
694 #
695 dCommentForResultId = oCaseFile.dCommentForResultId;
696 if oCaseFile.dReasonForResultId:
697 # Must weed out ktHarmless.
698 dReasonForResultId = {};
699 for idKey, tReason in oCaseFile.dReasonForResultId.items():
700 if tReason is not self.ktHarmless:
701 dReasonForResultId[idKey] = tReason;
702 if not dReasonForResultId:
703 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
704 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
705 return False;
706
707 # Try promote to single reason.
708 atValues = dReasonForResultId.values();
709 fSingleReason = True;
710 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
711 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
712 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
713 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
714 else:
715 fSingleReason = False;
716 if fSingleReason:
717 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
718 if dCommentForResultId:
719 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
720 elif oCaseFile.tReason is not None:
721 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
722 else:
723 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
724 return False;
725
726 self.vprint(u'Closing %s with following reason%s: %s'
727 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
728
729 #
730 # Add the test failure reason record(s).
731 #
732 for idTestResult, tReason in dReasonForResultId.items():
733 oFailureReason = self.getFailureReason(tReason);
734 if oFailureReason is not None:
735 sComment = 'Set by $Revision: 90595 $' # Handy for reverting later.
736 if idTestResult in dCommentForResultId:
737 sComment += ': ' + dCommentForResultId[idTestResult];
738
739 oAdd = TestResultFailureData();
740 oAdd.initFromValues(idTestResult = idTestResult,
741 idFailureReason = oFailureReason.idFailureReason,
742 uidAuthor = self.uidSelf,
743 idTestSet = oCaseFile.oTestSet.idTestSet,
744 sComment = sComment,);
745 if self.oConfig.fRealRun:
746 try:
747 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
748 except Exception as oXcpt:
749 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
750 % (oXcpt, oAdd, oCaseFile.sLongName,));
751 else:
752 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
753 return True;
754
755 #
756 # Tools for assiting log parsing.
757 #
758
759 @staticmethod
760 def matchFollowedByLines(sStr, off, asFollowingLines):
761 """ Worker for isThisFollowedByTheseLines. """
762
763 # Advance off to the end of the line.
764 off = sStr.find('\n', off);
765 if off < 0:
766 return False;
767 off += 1;
768
769 # Match each string with the subsequent lines.
770 for iLine, sLine in enumerate(asFollowingLines):
771 offEnd = sStr.find('\n', off);
772 if offEnd < 0:
773 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
774 if sLine and sStr.find(sLine, off, offEnd) < 0:
775 return False;
776
777 # next line.
778 off = offEnd + 1;
779
780 return True;
781
782 @staticmethod
783 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
784 """
785 Looks for a line contining sFirst which is then followed by lines
786 with the strings in asFollowingLines. (No newline chars anywhere!)
787 Returns True / False.
788 """
789 off = sStr.find(sFirst, 0);
790 while off >= 0:
791 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
792 return True;
793 off = sStr.find(sFirst, off + 1);
794 return False;
795
796 @staticmethod
797 def findAndReturnRestOfLine(sHaystack, sNeedle):
798 """
799 Looks for sNeedle in sHaystack.
800 Returns The text following the needle up to the end of the line.
801 Returns None if not found.
802 """
803 if sHaystack is None:
804 return None;
805 off = sHaystack.find(sNeedle);
806 if off < 0:
807 return None;
808 off += len(sNeedle)
809 offEol = sHaystack.find('\n', off);
810 if offEol < 0:
811 offEol = len(sHaystack);
812 return sHaystack[off:offEol]
813
814 @staticmethod
815 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
816 """
817 Looks for sNeedle in zeroe or more haystacks (asHaystack).
818 Returns The text following the first needed found up to the end of the line.
819 Returns None if not found.
820 """
821 for sHaystack in asHaystacks:
822 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
823 if sRet is not None:
824 return sRet;
825 return None;
826
827
828 #
829 # The investigative units.
830 #
831
832 katSimpleInstallUninstallMainLogReasons = [
833 # ( Whether to stop on hit, reason tuple, needle text. )
834 ( False, ktReason_Host_LeftoverService,
835 'SERVICE_NAME: vbox' ),
836 ( False, ktReason_Host_LeftoverService,
837 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
838 ];
839
840 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
841 'darwin': [
842 # ( Whether to stop on hit, reason tuple, needle text. )
843 ( True, ktReason_Host_DriverNotUnloading,
844 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
845 ],
846 'linux': [
847 # ( Whether to stop on hit, reason tuple, needle text. )
848 ( True, ktReason_Host_DriverNotCompilable,
849 'This system is not currently set up to build kernel modules' ),
850 ( True, ktReason_Host_DriverNotCompilable,
851 'This system is currently not set up to build kernel modules' ),
852 ( True, ktReason_Host_InstallationFailed,
853 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
854 ( True, ktReason_Host_DriverNotUnloading,
855 'Cannot unload module vboxdrv'),
856 ],
857 'solaris': [
858 # ( Whether to stop on hit, reason tuple, needle text. )
859 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
860 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
861 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
862 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
863 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
864 ],
865 };
866
867
868 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
869 """
870 Investigates an install or uninstall failure.
871
872 We lump the two together since the installation typically also performs
873 an uninstall first and will be seeing similar issues to the uninstall.
874 """
875 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
876
877 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
878 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
879 return True;
880
881 atSimple = self.katSimpleInstallUninstallMainLogReasons;
882 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
883 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
884
885 fFoundSomething = False;
886 for fStopOnHit, tReason, sNeedle in atSimple:
887 if sResultLog.find(sNeedle) > 0:
888 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
889 if fStopOnHit:
890 return True;
891 fFoundSomething = True;
892
893 return fFoundSomething if fFoundSomething else None;
894
895
896 def investigateBadTestBox(self, oCaseFile):
897 """
898 Checks out bad-testbox statuses.
899 """
900 _ = oCaseFile;
901 return False;
902
903
904 def investigateVBoxUnitTest(self, oCaseFile):
905 """
906 Checks out a VBox unittest problem.
907 """
908
909 #
910 # Process simple test case failures first, using their name as reason.
911 # We do the reason management just like for BSODs.
912 #
913 cRelevantOnes = 0;
914 sMainLog = oCaseFile.getMainLog();
915 aoFailedResults = oCaseFile.oTree.getListOfFailures();
916 for oFailedResult in aoFailedResults:
917 if oFailedResult is oCaseFile.oTree:
918 self.vprint('TODO: toplevel failure');
919 cRelevantOnes += 1
920
921 elif oFailedResult.sName == 'Installing VirtualBox':
922 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
923 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
924 cRelevantOnes += 1
925
926 elif oFailedResult.sName == 'Uninstalling VirtualBox':
927 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
928 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
929 cRelevantOnes += 1
930
931 elif oFailedResult.oParent is not None:
932 # Get the 2nd level node because that's where we'll find the unit test name.
933 while oFailedResult.oParent.oParent is not None:
934 oFailedResult = oFailedResult.oParent;
935
936 # Only report a failure once.
937 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
938 sKey = oFailedResult.sName;
939 if sKey.startswith('testcase/'):
940 sKey = sKey[9:];
941 if sKey in self.asUnitTestReasons:
942 tReason = ( self.ksUnitTestCategory, sKey );
943 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
944 else:
945 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
946 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
947 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
948 cRelevantOnes += 1
949 else:
950 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
951
952 #
953 # If we've caught all the relevant ones by now, report the result.
954 #
955 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
956 return self.caseClosed(oCaseFile);
957 return False;
958
959 def extractGuestCpuStack(self, sInfoText):
960 """
961 Extracts the guest CPU stacks from the input file.
962
963 Returns a dictionary keyed by the CPU number, value being a list of
964 raw stack lines (no header).
965 Returns empty dictionary if no stacks where found.
966 """
967 dRet = {};
968 off = 0;
969 while True:
970 # Find the stack.
971 offStart = sInfoText.find('=== start guest stack VCPU ', off);
972 if offStart < 0:
973 break;
974 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
975 if offEnd >= 0:
976 offEnd += 3;
977 else:
978 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
979 if offEnd < 0:
980 offEnd = len(sInfoText);
981
982 sStack = sInfoText[offStart : offEnd];
983 sStack = sStack.replace('\r',''); # paranoia
984 asLines = sStack.split('\n');
985
986 # Figure the CPU.
987 asWords = asLines[0].split();
988 if len(asWords) < 6 or not asWords[5].isdigit():
989 break;
990 iCpu = int(asWords[5]);
991
992 # Add it and advance.
993 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
994 off = offEnd;
995 return dRet;
996
997 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
998 """ Investigates kvm_lock_spinning deadlocks """
999 #
1000 # Extract the stacks. We need more than one CPU to create a deadlock.
1001 #
1002 dStacks = self.extractGuestCpuStack(sInfoText);
1003 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1004 if len(dStacks) >= 2:
1005 #
1006 # Examin each of the stacks. Each must have kvm_lock_spinning in
1007 # one of the first three entries.
1008 #
1009 cHits = 0;
1010 for iCpu in dStacks:
1011 asBacktrace = dStacks[iCpu];
1012 for iFrame in xrange(min(3, len(asBacktrace))):
1013 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1014 cHits += 1;
1015 break;
1016 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1017 if cHits == len(dStacks):
1018 return (True, self.ktReason_VMM_kvm_lock_spinning);
1019
1020 _ = dLogs; _ = oCaseFile;
1021 return (False, None);
1022
1023 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1024 """ Investigates HalReturnToFirmware hangs """
1025 del oCaseFile
1026 del sInfoText
1027 del dLogs
1028 # hope that's sufficient
1029 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1030
1031 ## Things we search a main or VM log for to figure out why something went bust.
1032 ## @note DO NOT ADD MORE STUFF HERE!
1033 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1034 katSimpleMainAndVmLogReasonsDeprecated = [
1035 # ( Whether to stop on hit, reason tuple, needle text. )
1036 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1037 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1038 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1039 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1040 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1041 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1042 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1043 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1044 ( True, ktReason_Networking_Nonexistent_host_nic,
1045 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1046 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1047 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1048 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1049 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1050 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1051 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1052 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1053 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1054 ( True, ktReason_Unknown_File_Not_Found,
1055 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1056 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1057 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1058 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1059 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1060 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1061 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1062 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1063 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1064 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1065 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1066 ];
1067
1068 ## This we search a main log for to figure out why something went bust.
1069 katSimpleMainLogReasons = [
1070 # ( Whether to stop on hit, reason tuple, needle text. )
1071 ];
1072
1073 ## This we search a VM log for to figure out why something went bust.
1074 katSimpleVmLogReasons = [
1075 # ( Whether to stop on hit, reason tuple, needle text. )
1076 ];
1077
1078 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1079 katSimpleVBoxHardeningLogReasons = [
1080 # ( Whether to stop on hit, reason tuple, needle text. )
1081 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1082 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1083 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1084 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1085 ];
1086
1087 ## Things we search a kernel.log file for to figure out why something went bust.
1088 katSimpleKernelLogReasons = [
1089 # ( Whether to stop on hit, reason tuple, needle text. )
1090 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1091 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1092 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1093 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1094 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1095 ];
1096
1097 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1098 katSimpleVgaTextReasons = [
1099 # ( Whether to stop on hit, reason tuple, needle text. )
1100 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1101 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1102 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1103 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1104 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1105 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1106 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1107 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1108 "and send a report. Then try booting with the 'noapic' option\n"
1109 "\n" ),
1110 ( True, ktReason_OSInstall_GRUB_hang,
1111 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1112 ( True, ktReason_OSInstall_GRUB_hang,
1113 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1114 ( True, ktReason_OSInstall_GRUB_hang,
1115 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1116 ( True, ktReason_OSInstall_GRUB_hang,
1117 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1118 ( True, ktReason_OSInstall_Udev_hang,
1119 "\nStarting udev:\n\n\n\n" ),
1120 ( True, ktReason_OSInstall_Udev_hang,
1121 "\nStarting udev:\n------" ),
1122 ( True, ktReason_Panic_BootManagerC000000F,
1123 "Windows failed to start. A recent hardware or software change might be the" ),
1124 ( True, ktReason_BootManager_Image_corrupt,
1125 "BOOTMGR image is corrupt. The system cannot boot." ),
1126 ];
1127
1128 ## Things we search for in the info.txt file. Require handlers for now.
1129 katInfoTextHandlers = [
1130 # ( Trigger text, handler method )
1131 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1132 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1133 ];
1134
1135 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1136 katSimpleScreenshotHashReasons = [
1137 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1138 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1139 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1140 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1141 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1142 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1143 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1144 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1145 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1146 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1147 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1148 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1149 ];
1150
1151
1152 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1153 """
1154 Scans for atNeedles in sLog.
1155
1156 Returns True if a stop-on-hit neelde was found.
1157 Returns None if a no-stop reason was found.
1158 Returns False if no hit.
1159 """
1160 fRet = False;
1161 for fStopOnHit, tReason, oNeedle in atNeedles:
1162 fMatch = False;
1163 if utils.isString(oNeedle):
1164 for sLog in asLogs:
1165 if sLog:
1166 fMatch |= sLog.find(oNeedle) > 0;
1167 else:
1168 for sLog in asLogs:
1169 if sLog:
1170 fMatch |= oNeedle.search(sLog) is not None;
1171 if fMatch:
1172 oCaseFile.noteReasonForId(tReason, idTestResult);
1173 if fStopOnHit:
1174 return True;
1175 fRet = None;
1176 return fRet;
1177
1178
1179 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1180 """
1181 Investigates a failed VM run.
1182 """
1183 enmReason = None;
1184 if oFailedResult.sName == 'mmap':
1185 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1186 enmReason = self.ktReason_Add_Mmap_Coherency;
1187 elif sResultLog.find('FlushViewOfFile') >= 0:
1188 enmReason = self.ktReason_Add_FlushViewOfFile;
1189 elif oFailedResult.sName == 'Copy to guest':
1190 if sResultLog.find('*** abort action ***') >= 0:
1191 enmReason = self.ktReason_Add_CopyToGuest_Timeout;
1192
1193 if enmReason is not None:
1194 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1195
1196 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1197 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1198 return False;
1199
1200 def isResultFromGATest(self, oFailedResult):
1201 """
1202 Checks if this result and corresponding log snippet looks like a GA test run.
1203 """
1204 while oFailedResult is not None:
1205 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', ]:
1206 return True;
1207 oFailedResult = oFailedResult.oParent;
1208 return False;
1209
1210
1211 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1212 """
1213 Investigates a failed VM run.
1214 """
1215
1216 def investigateLogSet():
1217 """
1218 Investigates the current set of VM related logs.
1219 """
1220 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1221 % ( len(sResultLog if sResultLog else ''),
1222 len(sVMLog if sVMLog else ''),
1223 len(sKrnlLog if sKrnlLog else ''),
1224 len(sVgaText if sVgaText else ''),
1225 len(sInfoText if sInfoText else ''),
1226 len(sNtHardLog if sNtHardLog else ''),));
1227
1228 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1229 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1230 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1231 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1232 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1233 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1234
1235 # TODO: more
1236
1237 #
1238 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1239 #
1240 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1241 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1242 if sDetails is not None:
1243 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1244 sKey = sDetails.split(' ', 1)[0];
1245 try: sKey = '0x%08X' % (int(sKey, 16),);
1246 except: pass;
1247 if sKey in self.asBsodReasons:
1248 tReason = ( self.ksBsodCategory, sKey );
1249 elif sKey.lower() in self.asBsodReasons: # just in case.
1250 tReason = ( self.ksBsodCategory, sKey.lower() );
1251 else:
1252 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1253 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1254 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1255
1256 fFoundSomething = False;
1257
1258 #
1259 # Look for linux panic.
1260 #
1261 if sKrnlLog is not None:
1262 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1263 if fRet is True:
1264 return fRet;
1265 fFoundSomething |= fRet is None;
1266
1267 #
1268 # Loop thru the simple stuff.
1269 #
1270
1271 # Main log.
1272 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1273 if fRet is True:
1274 return fRet;
1275 fFoundSomething |= fRet is None;
1276
1277 # VM log.
1278 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1279 if fRet is True:
1280 return fRet;
1281 fFoundSomething |= fRet is None;
1282
1283 # Old main + vm log.
1284 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1285 oCaseFile, oFailedResult.idTestResult);
1286 if fRet is True:
1287 return fRet;
1288 fFoundSomething |= fRet is None;
1289
1290 # Continue with vga text.
1291 if sVgaText:
1292 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1293 if fRet is True:
1294 return fRet;
1295 fFoundSomething |= fRet is None;
1296
1297 # Continue with screen hashes.
1298 if sScreenHash is not None:
1299 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1300 if sScreenHash == sHash:
1301 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1302 if fStopOnHit:
1303 return True;
1304 fFoundSomething = True;
1305
1306 # Check VBoxHardening.log.
1307 if sNtHardLog is not None:
1308 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1309 if fRet is True:
1310 return fRet;
1311 fFoundSomething |= fRet is None;
1312
1313 #
1314 # Complicated stuff.
1315 #
1316 dLogs = {
1317 'sVMLog': sVMLog,
1318 'sNtHardLog': sNtHardLog,
1319 'sScreenHash': sScreenHash,
1320 'sKrnlLog': sKrnlLog,
1321 'sVgaText': sVgaText,
1322 'sInfoText': sInfoText,
1323 };
1324
1325 # info.txt.
1326 if sInfoText:
1327 for sNeedle, fnHandler in self.katInfoTextHandlers:
1328 if sInfoText.find(sNeedle) > 0:
1329 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1330 if tReason is not None:
1331 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1332 if fStop:
1333 return True;
1334 fFoundSomething = True;
1335
1336 #
1337 # Check for repeated reboots...
1338 #
1339 if sVMLog is not None:
1340 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1341 if cResets > 10:
1342 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1343 sComment = 'Counted %s reboots' % (cResets,));
1344
1345 return fFoundSomething;
1346
1347 #
1348 # Check if we got any VM or/and kernel logs. Treat them as sets in
1349 # case we run multiple VMs here (this is of course ASSUMING they
1350 # appear in the order that terminateVmBySession uploads them).
1351 #
1352 cTimes = 0;
1353 sVMLog = None;
1354 sNtHardLog = None;
1355 sScreenHash = None;
1356 sKrnlLog = None;
1357 sVgaText = None;
1358 sInfoText = None;
1359 for oFile in oFailedResult.aoFiles:
1360 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1361 if 'VBoxHardening.log' not in oFile.sFile:
1362 if sVMLog is not None:
1363 if investigateLogSet() is True:
1364 return True;
1365 cTimes += 1;
1366 sInfoText = None;
1367 sVgaText = None;
1368 sKrnlLog = None;
1369 sScreenHash = None;
1370 sNtHardLog = None;
1371 sVMLog = oCaseFile.getLogFile(oFile);
1372 else:
1373 sNtHardLog = oCaseFile.getLogFile(oFile);
1374 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1375 sKrnlLog = oCaseFile.getLogFile(oFile);
1376 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1377 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1378 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1379 sInfoText = oCaseFile.getLogFile(oFile);
1380 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1381 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1382 if sScreenHash is not None:
1383 sScreenHash = sScreenHash.lower();
1384 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1385
1386 if ( sVMLog is not None \
1387 or sNtHardLog is not None \
1388 or cTimes == 0) \
1389 and investigateLogSet() is True:
1390 return True;
1391
1392 return None;
1393
1394 def isResultFromVMRun(self, oFailedResult, sResultLog):
1395 """
1396 Checks if this result and corresponding log snippet looks like a VM run.
1397 """
1398
1399 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1400 if sResultLog.find(' startVm') > 0:
1401 return True;
1402
1403 # Any other indicators? No?
1404 _ = oFailedResult;
1405 return False;
1406
1407
1408 ## Things we search a VBoxSVC log for to figure out why something went bust.
1409 katSimpleSvcLogReasons = [
1410 # ( Whether to stop on hit, reason tuple, needle text. )
1411 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1412 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1413 ];
1414
1415 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1416 """
1417 Check the VBoxSVC log for a single VM run.
1418 """
1419 if sSvcLog:
1420 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1421 if fRet is True or fRet is None:
1422 return True;
1423 return False;
1424
1425 def investigateNtHardLogForVMRun(self, oCaseFile):
1426 """
1427 Check if the hardening log for a single VM run contains VM crash indications.
1428 """
1429 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1430 for oLogFile in aoLogFiles:
1431 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1432 sLog = oCaseFile.getLogFile(oLogFile);
1433 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1434 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1435 return False;
1436
1437
1438 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1439 """
1440 Checks out a VBox VM test.
1441
1442 This is generic investigation of a test running one or more VMs, like
1443 for example a smoke test or a guest installation test.
1444
1445 The fSingleVM parameter is a hint, which probably won't come in useful.
1446 """
1447 _ = fSingleVM;
1448
1449 #
1450 # Get a list of test result failures we should be looking into and the main log.
1451 #
1452 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1453 sMainLog = oCaseFile.getMainLog();
1454
1455 #
1456 # There are a set of errors ending up on the top level result record.
1457 # Should deal with these first.
1458 #
1459 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1460 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1461 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1462 # ignore other failures in the test if we're not a little bit careful here.
1463 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1464 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1465 return self.caseClosed(oCaseFile);
1466
1467 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1468 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1469 (' Timeout: ', ' Attempting to abort child...',) ):
1470 if sMainLog.find('*** glibc detected *** /') > 0:
1471 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1472 else:
1473 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1474 return self.caseClosed(oCaseFile);
1475
1476 # Look for heap corruption without visible hang.
1477 if sMainLog.find('*** glibc detected *** /') > 0 \
1478 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1479 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1480 return self.caseClosed(oCaseFile);
1481
1482 # Out of memory w/ timeout.
1483 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1484 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1485 return self.caseClosed(oCaseFile);
1486
1487 # Stale files like vts_rm.exe (windows).
1488 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1489 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1490 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1491 return self.caseClosed(oCaseFile);
1492
1493 #
1494 # XPCOM screwup
1495 #
1496 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1497 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1498 return self.caseClosed(oCaseFile);
1499
1500 #
1501 # Go thru each failed result.
1502 #
1503 for oFailedResult in aoFailedResults:
1504 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1505 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1506 if oFailedResult.sName == 'Installing VirtualBox':
1507 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1508
1509 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1510 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1511
1512 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1513 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1514
1515 elif self.isResultFromGATest(oFailedResult):
1516 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1517
1518 elif sResultLog.find('most likely not unique') > 0:
1519 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1520 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1521 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1522
1523 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1524 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1525 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1526
1527 elif sResultLog.find('** error: no action was specified') > 0 \
1528 or sResultLog.find('(len(self._asXml, asText))') > 0:
1529 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1530
1531 else:
1532 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1533 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1534
1535 #
1536 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1537 #
1538 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1539 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1540 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1541 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1542 return self.caseClosed(oCaseFile);
1543 if self.investigateNtHardLogForVMRun(oCaseFile):
1544 return self.caseClosed(oCaseFile);
1545
1546 #
1547 # Report home and close the case if we got them all, otherwise log it.
1548 #
1549 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1550 return self.caseClosed(oCaseFile);
1551
1552 if oCaseFile.dReasonForResultId:
1553 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1554 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1555 else:
1556 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1557 return False;
1558
1559
1560 ## Things we search a main log for to figure out why something in the API test went bust.
1561 katSimpleApiMainLogReasons = [
1562 # ( Whether to stop on hit, reason tuple, needle text. )
1563 ( True, ktReason_Networking_Nonexistent_host_nic,
1564 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1565 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1566 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1567 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1568 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1569 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1570 ( True, ktReason_API_MoveVM_InvalidParameter,
1571 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1572 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1573 ];
1574
1575 def investigateVBoxApiTest(self, oCaseFile):
1576 """
1577 Checks out a VBox API test.
1578 """
1579
1580 #
1581 # Get a list of test result failures we should be looking into and the main log.
1582 #
1583 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1584 sMainLog = oCaseFile.getMainLog();
1585
1586 #
1587 # Go thru each failed result.
1588 #
1589 for oFailedResult in aoFailedResults:
1590 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1591 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1592 if oFailedResult.sName == 'Installing VirtualBox':
1593 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1594
1595 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1596 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1597
1598 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1599 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1600
1601 else:
1602 fFoundSomething = False;
1603 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1604 if sResultLog.find(sNeedle) > 0:
1605 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1606 fFoundSomething = True;
1607 if fStopOnHit:
1608 break;
1609 if fFoundSomething:
1610 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1611 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1612
1613 #
1614 # Report home and close the case if we got them all, otherwise log it.
1615 #
1616 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1617 return self.caseClosed(oCaseFile);
1618
1619 if oCaseFile.dReasonForResultId:
1620 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1621 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1622 else:
1623 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1624 return False;
1625
1626
1627 def reasoningFailures(self):
1628 """
1629 Guess the reason for failures.
1630 """
1631 #
1632 # Get a list of failed test sets without any assigned failure reason.
1633 #
1634 cGot = 0;
1635 if not self.oConfig.aidTestSets:
1636 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1637 tsNow = self.tsNow);
1638 else:
1639 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1640 for oTestSet in aoTestSets:
1641 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1642 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1643
1644 #
1645 # Open a case file and assign it to the right investigator.
1646 #
1647 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1648 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1649 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1650 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1651 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1652
1653 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1654
1655 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1656 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1657 fRc = self.investigateBadTestBox(oCaseFile);
1658
1659 elif oCaseFile.isVBoxUnitTest():
1660 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1661 fRc = self.investigateVBoxUnitTest(oCaseFile);
1662
1663 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1664 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1665 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1666
1667 elif oCaseFile.isVBoxUSBTest():
1668 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1669 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1670
1671 elif oCaseFile.isVBoxStorageTest():
1672 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1673 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1674
1675 elif oCaseFile.isVBoxGAsTest():
1676 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1677 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1678
1679 elif oCaseFile.isVBoxAPITest():
1680 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1681 fRc = self.investigateVBoxApiTest(oCaseFile);
1682
1683 elif oCaseFile.isVBoxBenchmarkTest():
1684 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1685 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1686
1687 elif oCaseFile.isVBoxSmokeTest():
1688 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1689 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1690
1691 elif oCaseFile.isVBoxSerialTest():
1692 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1693 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1694
1695 else:
1696 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1697 fRc = False;
1698 cGot += fRc is True;
1699
1700 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1701 return 0;
1702
1703
1704 def main(self):
1705 """
1706 The 'main' function.
1707 Return exit code (0, 1, etc).
1708 """
1709 # Database stuff.
1710 self.oDb = TMDatabaseConnection()
1711 self.oTestResultLogic = TestResultLogic(self.oDb);
1712 self.oTestSetLogic = TestSetLogic(self.oDb);
1713 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1714 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1715 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1716 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1717
1718 # Get a fix on our 'now' before we do anything..
1719 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1720 self.tsNow = self.oDb.fetchOne();
1721
1722 # If we're suppost to commit anything we need to get our user ID.
1723 rcExit = 0;
1724 if self.oConfig.fRealRun:
1725 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1726 if self.oLogin is None:
1727 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1728 else:
1729 self.uidSelf = self.oLogin.uid;
1730
1731 #
1732 # Do the stuff.
1733 #
1734 if rcExit == 0:
1735 rcExit = self.selfCheck();
1736 if rcExit == 0:
1737 rcExit = self.badTestBoxManagement();
1738 rcExit2 = self.reasoningFailures();
1739 if rcExit == 0:
1740 rcExit = rcExit2;
1741 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1742 if rcExit == 0:
1743 rcExit = self.badTestBoxManagement();
1744
1745 # Cleanup.
1746 self.oFailureReasonLogic = None;
1747 self.oTestResultFailureLogic = None;
1748 self.oTestSetLogic = None;
1749 self.oTestResultLogic = None;
1750 self.oDb.close();
1751 self.oDb = None;
1752 if self.oLogFile is not None:
1753 self.oLogFile.close();
1754 self.oLogFile = None;
1755 return rcExit;
1756
1757if __name__ == '__main__':
1758 sys.exit(VirtualTestSheriff().main());
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette