VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 93772

Last change on this file since 93772 was 93772, checked in by vboxsync, 3 years ago

vsheriff: Try fix image hashing error with python 3 and newer Pillow versions.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 88.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 93772 2022-02-15 23:03:25Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2022 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 93772 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46#import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import BytesIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower() == 'misc: unit tests');
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(BytesIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 if hasattr(oImage, 'tobytes'):
275 oHash.update(oImage.tobytes());
276 else:
277 oHash.update(oImage.tostring()); # pylint: disable=no-member
278 except Exception as oXcpt:
279 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
280 else:
281 return oHash.hexdigest();
282 return None;
283
284
285
286 def isSingleTestFailure(self):
287 """
288 Figure out if this is a single test failing or if it's one of the
289 more complicated ones.
290 """
291 if self.oTree.cErrors == 1:
292 return True;
293 if self.oTree.deepCountErrorContributers() <= 1:
294 return True;
295 return False;
296
297
298
299class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
300 """
301 Add build info into Test Manager database.
302 """
303
304 ## The user account for the virtual sheriff.
305 ksLoginName = 'vsheriff';
306
307 def __init__(self):
308 """
309 Parse command line.
310 """
311 self.oDb = None;
312 self.tsNow = None;
313 self.oTestResultLogic = None;
314 self.oTestSetLogic = None;
315 self.oFailureReasonLogic = None; # FailureReasonLogic;
316 self.oTestResultFailureLogic = None; # TestResultFailureLogic
317 self.oLogin = None;
318 self.uidSelf = -1;
319 self.oLogFile = None;
320 self.asBsodReasons = [];
321 self.asUnitTestReasons = [];
322
323 oParser = OptionParser();
324 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
325 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
326 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
327 help = 'Work period specified in hours. Defauls is 2 hours.');
328 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
329 help = 'Whether to commit the findings to the database. Default is a dry run.');
330 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
331 help = 'Only investigate this one. Accumulates IDs when repeated.');
332 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
333 help = 'Quiet execution');
334 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
335 help = 'Where to log messages.');
336 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
337 help = 'Enables debug mode.');
338
339 (self.oConfig, _) = oParser.parse_args();
340
341 if self.oConfig.sLogFile:
342 self.oLogFile = open(self.oConfig.sLogFile, "a");
343 self.oLogFile.write('VirtualTestSheriff: $Revision: 93772 $ \n');
344
345
346 def eprint(self, sText):
347 """
348 Prints error messages.
349 Returns 1 (for exit code usage.)
350 """
351 print('error: %s' % (sText,));
352 if self.oLogFile is not None:
353 if sys.version_info[0] >= 3:
354 self.oLogFile.write(u'error: %s\n' % (sText,));
355 else:
356 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
357 return 1;
358
359 def dprint(self, sText):
360 """
361 Prints debug info.
362 """
363 if self.oConfig.fDebug:
364 if not self.oConfig.fQuiet:
365 print('debug: %s' % (sText, ));
366 if self.oLogFile is not None:
367 if sys.version_info[0] >= 3:
368 self.oLogFile.write(u'debug: %s\n' % (sText,));
369 else:
370 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
371 return 0;
372
373 def vprint(self, sText):
374 """
375 Prints verbose info.
376 """
377 if not self.oConfig.fQuiet:
378 print('info: %s' % (sText,));
379 if self.oLogFile is not None:
380 if sys.version_info[0] >= 3:
381 self.oLogFile.write(u'info: %s\n' % (sText,));
382 else:
383 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
384 return 0;
385
386 def getFailureReason(self, tReason):
387 """ Gets the failure reason object for tReason. """
388 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
389
390 def selfCheck(self):
391 """ Does some self checks, looking up things we expect to be in the database and such. """
392 rcExit = 0;
393 for sAttr in dir(self.__class__):
394 if sAttr.startswith('ktReason_'):
395 tReason = getattr(self.__class__, sAttr);
396 oFailureReason = self.getFailureReason(tReason);
397 if oFailureReason is None:
398 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
399 % (tReason[1], tReason[0],));
400
401 # Check the user account as well.
402 if self.oLogin is None:
403 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
404 if oLogin is None:
405 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
406 return rcExit;
407
408 def sendEmailAlert(self, uidAuthor, sBodyText):
409 """
410 Sends email alert.
411 """
412
413 # Get author email
414 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
415 sFrom = self.oDb.fetchOne();
416 if sFrom is not None:
417 sFrom = sFrom[0];
418 else:
419 sFrom = g_ksAlertFrom;
420
421 # Gather recipient list.
422 asEmailList = [];
423 for sUser in g_asAlertList:
424 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
425 sEmail = self.oDb.fetchOne();
426 if sEmail:
427 asEmailList.append(sEmail[0]);
428 if not asEmailList:
429 return self.eprint('No email addresses to send alter to!');
430
431 # Compose the message.
432 oMsg = MIMEMultipart();
433 oMsg['From'] = sFrom;
434 oMsg['To'] = COMMASPACE.join(asEmailList);
435 oMsg['Subject'] = g_ksAlertSubject;
436 oMsg.attach(MIMEText(sBodyText, 'plain'))
437
438 # Try send it.
439 try:
440 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
441 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
442 oSMTP.quit()
443 except smtplib.SMTPException as oXcpt:
444 return self.eprint('Failed to send mail: %s' % (oXcpt,));
445
446 return 0;
447
448 def badTestBoxManagement(self):
449 """
450 Looks for bad test boxes and first tries once to reboot them then disables them.
451 """
452 rcExit = 0;
453
454 #
455 # We skip this entirely if we're running in the past and not in harmless debug mode.
456 #
457 if self.oConfig.cStartHoursAgo != 0 \
458 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
459 return rcExit;
460 tsNow = self.tsNow if self.oConfig.fDebug else None;
461 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
462 oTestBoxLogic = TestBoxLogic(self.oDb);
463
464 #
465 # Generate a list of failures reasons we consider bad-testbox behavior.
466 #
467 aidFailureReasons = [
468 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
469 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
470 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
471 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
472 ];
473
474 #
475 # Get list of bad test boxes for given period and check them out individually.
476 #
477 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
478 aidFailureReasons = aidFailureReasons);
479 for idTestBox in aidBadTestBoxes:
480 # Skip if the testbox is already disabled or has a pending reboot command.
481 try:
482 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
483 except Exception as oXcpt:
484 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
485 continue;
486 if not oTestBox.fEnabled:
487 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
488 % ( idTestBox, oTestBox.sName, ));
489 continue;
490 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
491 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
492 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
493 continue;
494
495 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
496 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
497 cOkay = 0;
498 cBad = 0;
499 iFirstOkay = len(aoSets);
500 for iSet, oSet in enumerate(aoSets):
501 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
502 cBad += 1;
503 else:
504 # Check for bad failure reasons.
505 oFailure = None;
506 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
507 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
508 aoFailedResults = oTree.getListOfFailures();
509 for oFailedResult in aoFailedResults:
510 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
511 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
512 break;
513 oFailure = None;
514 if oFailure is not None:
515 cBad += 1;
516 else:
517 # This is an okay test result then.
518 ## @todo maybe check the elapsed time here, it could still be a bad run?
519 cOkay += 1;
520 if iFirstOkay > iSet:
521 iFirstOkay = iSet;
522 if iSet > 10:
523 break;
524
525 # We react if there are two or more bad-testbox statuses at the head of the
526 # history and at least three in the last 10 results.
527 if iFirstOkay >= 2 and cBad > 2:
528 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
529 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
530 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
531 self.vprint(sComment);
532 self.sendEmailAlert(self.uidSelf, sComment);
533 if self.oConfig.fRealRun is True:
534 try:
535 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
536 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
537 % (iFirstOkay, cBad, cOkay),);
538 except Exception as oXcpt:
539 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
540 else:
541 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
542 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
543 self.vprint(sComment);
544 self.sendEmailAlert(self.uidSelf, sComment);
545 if self.oConfig.fRealRun is True:
546 try:
547 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
548 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
549 % (iFirstOkay, cBad, cOkay),);
550 except Exception as oXcpt:
551 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
552 else:
553 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
554 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
555
556 ## @todo r=bird: review + rewrite;
557 ## - no selecting here, that belongs in the core/*.py files.
558 ## - preserve existing comments.
559 ## - doing way too much in the try/except block.
560 ## - No password quoting in the sshpass command that always fails (127).
561 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
562 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
563 ## Besides, it need to be constant elsewhere in the file, not a variable here.
564 ##
565 ##
566 ## Reset hanged testboxes
567 ##
568 #cStatusTimeoutMins = 10;
569 #
570 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
571 # ' FROM TestBoxStatuses, TestBoxes\n'
572 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
573 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
574 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
575 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
576 #for idTestBox in self.oDb.fetchAll():
577 # idTestBox = idTestBox[0];
578 # try:
579 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
580 # except Exception as oXcpt:
581 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
582 # continue;
583 # # Skip if the testbox is already disabled, already reset or there's no iLOM
584 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
585 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
586 # % ( idTestBox, oTestBox.sName, ));
587 # continue;
588 # ## @todo get iLOM credentials from a table?
589 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
590 # try:
591 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
592 # sStdout = oPs.communicate()[0];
593 # iRC = oPs.wait();
594 #
595 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
596 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
597 #
598 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
599 # self.vprint(sComment);
600 # self.sendEmailAlert(self.uidSelf, sComment);
601 #
602 # except Exception as oXcpt:
603 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
604 #
605 return rcExit;
606
607
608 ## @name Failure reasons we know.
609 ## @{
610
611 ktReason_Add_Installer_Win_Failed = ( 'Additions', 'Win GA install' );
612 ktReason_Add_ShFl_Automount = ( 'Additions', 'Automounting' );
613 ktReason_Add_ShFl_FsPerf = ( 'Additions', 'FsPerf' );
614 ktReason_Add_GstCtl_Preparations = ( 'Additions', 'GstCtl preparations' );
615 ktReason_Add_GstCtl_SessionBasics = ( 'Additions', 'Session basics' );
616 ktReason_Add_GstCtl_SessionProcRefs = ( 'Additions', 'Session process' );
617 ktReason_Add_GstCtl_Session_Reboot = ( 'Additions', 'Session reboot' );
618 ktReason_Add_GstCtl_CopyFromGuest_Timeout = ( 'Additions', 'CopyFromGuest timeout' );
619 ktReason_Add_GstCtl_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest timeout' );
620 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
621 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
622 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
623 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
624 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
625 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
626 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
627 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
628 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
629 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
630 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
631 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
632 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
633 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
634 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
635 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
636 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
637 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
638 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
639 ktReason_Host_InstallationWantReboot = ( 'Host', 'Installation want reboot' );
640 ktReason_Host_InvalidPackage = ( 'Host', 'ERROR_INSTALL_PACKAGE_INVALID' );
641 ktReason_Host_InstallSourceAbsent = ( 'Host', 'ERROR_INSTALL_SOURCE_ABSENT' );
642 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
643 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
644 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
645 ktReason_Host_win32com_gen_py = ( 'Host', 'win32com.gen_py' );
646 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
647 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
648 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
649 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
650 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
651 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
652 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
653 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
654 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
655 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
656 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
657 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
658 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
659 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
660 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
661 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
662 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
663 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
664 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
665 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
666 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
667 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
668 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
669 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
670 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
671 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
672 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
673 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
674 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
675 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
676 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
677 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
678 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
679 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
680 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
681 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
682 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
683 ## @}
684
685 ## BSOD category.
686 ksBsodCategory = 'BSOD';
687 ## Special reason indicating that the flesh and blood sheriff has work to do.
688 ksBsodAddNew = 'Add new BSOD';
689
690 ## Unit test category.
691 ksUnitTestCategory = 'Unit';
692 ## Special reason indicating that the flesh and blood sheriff has work to do.
693 ksUnitTestAddNew = 'Add new';
694
695 ## Used for indica that we shouldn't report anything for this test result ID and
696 ## consider promoting the previous error to test set level if it's the only one.
697 ktHarmless = ( 'Probably', 'Caused by previous error' );
698
699
700 def caseClosed(self, oCaseFile):
701 """
702 Reports the findings in the case and closes it.
703 """
704 #
705 # Log it and create a dReasonForReasultId we can use below.
706 #
707 dCommentForResultId = oCaseFile.dCommentForResultId;
708 if oCaseFile.dReasonForResultId:
709 # Must weed out ktHarmless.
710 dReasonForResultId = {};
711 for idKey, tReason in oCaseFile.dReasonForResultId.items():
712 if tReason is not self.ktHarmless:
713 dReasonForResultId[idKey] = tReason;
714 if not dReasonForResultId:
715 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
716 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
717 return False;
718
719 # Try promote to single reason.
720 atValues = dReasonForResultId.values();
721 fSingleReason = True;
722 if len(dReasonForResultId) == 1 and next(iter(dReasonForResultId.keys())) != oCaseFile.oTestSet.idTestResult:
723 self.dprint(u'Promoting single reason to whole set: %s' % (next(iter(atValues)),));
724 elif len(dReasonForResultId) > 1 and len(atValues) == list(atValues).count(next(iter(atValues))):
725 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), next(iter(atValues))));
726 else:
727 fSingleReason = False;
728 if fSingleReason:
729 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(atValues)), };
730 if dCommentForResultId:
731 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(dCommentForResultId.values())), };
732 elif oCaseFile.tReason is not None:
733 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
734 else:
735 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
736 return False;
737
738 self.vprint(u'Closing %s with following reason%s: %s'
739 % ( oCaseFile.sName, 's' if len(dReasonForResultId) > 1 else '', dReasonForResultId, ));
740
741 #
742 # Add the test failure reason record(s).
743 #
744 for idTestResult, tReason in dReasonForResultId.items():
745 oFailureReason = self.getFailureReason(tReason);
746 if oFailureReason is not None:
747 sComment = 'Set by $Revision: 93772 $' # Handy for reverting later.
748 if idTestResult in dCommentForResultId:
749 sComment += ': ' + dCommentForResultId[idTestResult];
750
751 oAdd = TestResultFailureData();
752 oAdd.initFromValues(idTestResult = idTestResult,
753 idFailureReason = oFailureReason.idFailureReason,
754 uidAuthor = self.uidSelf,
755 idTestSet = oCaseFile.oTestSet.idTestSet,
756 sComment = sComment,);
757 if self.oConfig.fRealRun:
758 try:
759 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
760 except Exception as oXcpt:
761 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
762 % (oXcpt, oAdd, oCaseFile.sLongName,));
763 else:
764 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
765 return True;
766
767 #
768 # Tools for assiting log parsing.
769 #
770
771 @staticmethod
772 def matchFollowedByLines(sStr, off, asFollowingLines):
773 """ Worker for isThisFollowedByTheseLines. """
774
775 # Advance off to the end of the line.
776 off = sStr.find('\n', off);
777 if off < 0:
778 return False;
779 off += 1;
780
781 # Match each string with the subsequent lines.
782 for iLine, sLine in enumerate(asFollowingLines):
783 offEnd = sStr.find('\n', off);
784 if offEnd < 0:
785 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
786 if sLine and sStr.find(sLine, off, offEnd) < 0:
787 return False;
788
789 # next line.
790 off = offEnd + 1;
791
792 return True;
793
794 @staticmethod
795 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
796 """
797 Looks for a line contining sFirst which is then followed by lines
798 with the strings in asFollowingLines. (No newline chars anywhere!)
799 Returns True / False.
800 """
801 off = sStr.find(sFirst, 0);
802 while off >= 0:
803 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
804 return True;
805 off = sStr.find(sFirst, off + 1);
806 return False;
807
808 @staticmethod
809 def findAndReturnRestOfLine(sHaystack, sNeedle):
810 """
811 Looks for sNeedle in sHaystack.
812 Returns The text following the needle up to the end of the line.
813 Returns None if not found.
814 """
815 if sHaystack is None:
816 return None;
817 off = sHaystack.find(sNeedle);
818 if off < 0:
819 return None;
820 off += len(sNeedle)
821 offEol = sHaystack.find('\n', off);
822 if offEol < 0:
823 offEol = len(sHaystack);
824 return sHaystack[off:offEol]
825
826 @staticmethod
827 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
828 """
829 Looks for sNeedle in zeroe or more haystacks (asHaystack).
830 Returns The text following the first needed found up to the end of the line.
831 Returns None if not found.
832 """
833 for sHaystack in asHaystacks:
834 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
835 if sRet is not None:
836 return sRet;
837 return None;
838
839
840 #
841 # The investigative units.
842 #
843
844 katSimpleInstallUninstallMainLogReasons = [
845 # ( Whether to stop on hit, reason tuple, needle text. )
846 ( False, ktReason_Host_LeftoverService,
847 'SERVICE_NAME: vbox' ),
848 ( False, ktReason_Host_LeftoverService,
849 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
850 ];
851
852 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
853 'darwin': [
854 # ( Whether to stop on hit, reason tuple, needle text. )
855 ( True, ktReason_Host_DriverNotUnloading,
856 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
857 ],
858 'linux': [
859 # ( Whether to stop on hit, reason tuple, needle text. )
860 ( True, ktReason_Host_DriverNotCompilable,
861 'This system is not currently set up to build kernel modules' ),
862 ( True, ktReason_Host_DriverNotCompilable,
863 'This system is currently not set up to build kernel modules' ),
864 ( True, ktReason_Host_InstallationFailed,
865 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
866 ( True, ktReason_Host_DriverNotUnloading,
867 'Cannot unload module vboxdrv'),
868 ],
869 'solaris': [
870 # ( Whether to stop on hit, reason tuple, needle text. )
871 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
872 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
873 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
874 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
875 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
876 ],
877 'win': [
878 # ( Whether to stop on hit, reason tuple, needle text. )
879 ( True, ktReason_Host_InstallationWantReboot, 'ERROR_SUCCESS_REBOOT_REQUIRED' ),
880 ( False, ktReason_Host_InstallationFailed, 'Installation error.' ),
881 ( True, ktReason_Host_InvalidPackage, 'Uninstaller failed, exit code: 1620' ),
882 ( True, ktReason_Host_InstallSourceAbsent, 'Uninstaller failed, exit code: 1612' ),
883 ],
884 };
885
886
887 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
888 """
889 Investigates an install or uninstall failure.
890
891 We lump the two together since the installation typically also performs
892 an uninstall first and will be seeing similar issues to the uninstall.
893 """
894 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
895
896 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
897 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
898 return True;
899
900 atSimple = self.katSimpleInstallUninstallMainLogReasons;
901 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
902 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
903
904 fFoundSomething = False;
905 for fStopOnHit, tReason, sNeedle in atSimple:
906 if sResultLog.find(sNeedle) > 0:
907 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
908 if fStopOnHit:
909 return True;
910 fFoundSomething = True;
911
912 return fFoundSomething if fFoundSomething else None;
913
914
915 def investigateBadTestBox(self, oCaseFile):
916 """
917 Checks out bad-testbox statuses.
918 """
919 _ = oCaseFile;
920 return False;
921
922
923 def investigateVBoxUnitTest(self, oCaseFile):
924 """
925 Checks out a VBox unittest problem.
926 """
927
928 #
929 # Process simple test case failures first, using their name as reason.
930 # We do the reason management just like for BSODs.
931 #
932 cRelevantOnes = 0;
933 sMainLog = oCaseFile.getMainLog();
934 aoFailedResults = oCaseFile.oTree.getListOfFailures();
935 for oFailedResult in aoFailedResults:
936 if oFailedResult is oCaseFile.oTree:
937 self.vprint('TODO: toplevel failure');
938 cRelevantOnes += 1
939
940 elif oFailedResult.sName == 'Installing VirtualBox':
941 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
942 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
943 cRelevantOnes += 1
944
945 elif oFailedResult.sName == 'Uninstalling VirtualBox':
946 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
947 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
948 cRelevantOnes += 1
949
950 elif oFailedResult.oParent is not None:
951 # Get the 2nd level node because that's where we'll find the unit test name.
952 while oFailedResult.oParent.oParent is not None:
953 oFailedResult = oFailedResult.oParent;
954
955 # Only report a failure once.
956 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
957 sKey = oFailedResult.sName;
958 if sKey.startswith('testcase/'):
959 sKey = sKey[9:];
960 if sKey in self.asUnitTestReasons:
961 tReason = ( self.ksUnitTestCategory, sKey );
962 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
963 else:
964 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
965 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
966 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
967 cRelevantOnes += 1
968 else:
969 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
970
971 #
972 # If we've caught all the relevant ones by now, report the result.
973 #
974 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
975 return self.caseClosed(oCaseFile);
976 return False;
977
978 def extractGuestCpuStack(self, sInfoText):
979 """
980 Extracts the guest CPU stacks from the input file.
981
982 Returns a dictionary keyed by the CPU number, value being a list of
983 raw stack lines (no header).
984 Returns empty dictionary if no stacks where found.
985 """
986 dRet = {};
987 off = 0;
988 while True:
989 # Find the stack.
990 offStart = sInfoText.find('=== start guest stack VCPU ', off);
991 if offStart < 0:
992 break;
993 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
994 if offEnd >= 0:
995 offEnd += 3;
996 else:
997 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
998 if offEnd < 0:
999 offEnd = len(sInfoText);
1000
1001 sStack = sInfoText[offStart : offEnd];
1002 sStack = sStack.replace('\r',''); # paranoia
1003 asLines = sStack.split('\n');
1004
1005 # Figure the CPU.
1006 asWords = asLines[0].split();
1007 if len(asWords) < 6 or not asWords[5].isdigit():
1008 break;
1009 iCpu = int(asWords[5]);
1010
1011 # Add it and advance.
1012 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
1013 off = offEnd;
1014 return dRet;
1015
1016 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
1017 """ Investigates kvm_lock_spinning deadlocks """
1018 #
1019 # Extract the stacks. We need more than one CPU to create a deadlock.
1020 #
1021 dStacks = self.extractGuestCpuStack(sInfoText);
1022 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1023 if len(dStacks) >= 2:
1024 #
1025 # Examin each of the stacks. Each must have kvm_lock_spinning in
1026 # one of the first three entries.
1027 #
1028 cHits = 0;
1029 for iCpu in dStacks:
1030 asBacktrace = dStacks[iCpu];
1031 for iFrame in xrange(min(3, len(asBacktrace))):
1032 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1033 cHits += 1;
1034 break;
1035 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1036 if cHits == len(dStacks):
1037 return (True, self.ktReason_VMM_kvm_lock_spinning);
1038
1039 _ = dLogs; _ = oCaseFile;
1040 return (False, None);
1041
1042 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1043 """ Investigates HalReturnToFirmware hangs """
1044 del oCaseFile
1045 del sInfoText
1046 del dLogs
1047 # hope that's sufficient
1048 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1049
1050 ## Things we search a main or VM log for to figure out why something went bust.
1051 ## @note DO NOT ADD MORE STUFF HERE!
1052 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1053 katSimpleMainAndVmLogReasonsDeprecated = [
1054 # ( Whether to stop on hit, reason tuple, needle text. )
1055 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1056 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1057 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1058 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1059 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1060 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1061 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1062 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1063 ( True, ktReason_Networking_Nonexistent_host_nic,
1064 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1065 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1066 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1067 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1068 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1069 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1070 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1071 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1072 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1073 ( True, ktReason_Unknown_File_Not_Found,
1074 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1075 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1076 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1077 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1078 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1079 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1080 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1081 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1082 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1083 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1084 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1085 ];
1086
1087 ## This we search a main log for to figure out why something went bust.
1088 katSimpleMainLogReasons = [
1089 # ( Whether to stop on hit, reason tuple, needle text. )
1090 ( False, ktReason_Host_win32com_gen_py, 'ModuleNotFoundError: No module named \'win32com.gen_py' ),
1091
1092 ];
1093
1094 ## This we search a VM log for to figure out why something went bust.
1095 katSimpleVmLogReasons = [
1096 # ( Whether to stop on hit, reason tuple, needle text. )
1097 ];
1098
1099 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1100 katSimpleVBoxHardeningLogReasons = [
1101 # ( Whether to stop on hit, reason tuple, needle text. )
1102 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1103 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1104 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1105 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1106 ];
1107
1108 ## Things we search a kernel.log file for to figure out why something went bust.
1109 katSimpleKernelLogReasons = [
1110 # ( Whether to stop on hit, reason tuple, needle text. )
1111 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1112 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1113 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1114 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1115 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1116 ];
1117
1118 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1119 katSimpleVgaTextReasons = [
1120 # ( Whether to stop on hit, reason tuple, needle text. )
1121 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1122 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1123 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1124 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1125 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1126 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1127 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1128 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1129 "and send a report. Then try booting with the 'noapic' option\n"
1130 "\n" ),
1131 ( True, ktReason_OSInstall_GRUB_hang,
1132 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1133 ( True, ktReason_OSInstall_GRUB_hang,
1134 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1135 ( True, ktReason_OSInstall_GRUB_hang,
1136 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1137 ( True, ktReason_OSInstall_GRUB_hang,
1138 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1139 ( True, ktReason_OSInstall_Udev_hang,
1140 "\nStarting udev:\n\n\n\n" ),
1141 ( True, ktReason_OSInstall_Udev_hang,
1142 "\nStarting udev:\n------" ),
1143 ( True, ktReason_Panic_BootManagerC000000F,
1144 "Windows failed to start. A recent hardware or software change might be the" ),
1145 ( True, ktReason_BootManager_Image_corrupt,
1146 "BOOTMGR image is corrupt. The system cannot boot." ),
1147 ];
1148
1149 ## Things we search for in the info.txt file. Require handlers for now.
1150 katInfoTextHandlers = [
1151 # ( Trigger text, handler method )
1152 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1153 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1154 ];
1155
1156 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1157 katSimpleScreenshotHashReasons = [
1158 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1159 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1160 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1161 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1162 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1163 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1164 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1165 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1166 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1167 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1168 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1169 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1170 ];
1171
1172
1173 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1174 """
1175 Scans for atNeedles in sLog.
1176
1177 Returns True if a stop-on-hit neelde was found.
1178 Returns None if a no-stop reason was found.
1179 Returns False if no hit.
1180 """
1181 fRet = False;
1182 for fStopOnHit, tReason, oNeedle in atNeedles:
1183 fMatch = False;
1184 if utils.isString(oNeedle):
1185 for sLog in asLogs:
1186 if sLog:
1187 fMatch |= sLog.find(oNeedle) > 0;
1188 else:
1189 for sLog in asLogs:
1190 if sLog:
1191 fMatch |= oNeedle.search(sLog) is not None;
1192 if fMatch:
1193 oCaseFile.noteReasonForId(tReason, idTestResult);
1194 if fStopOnHit:
1195 return True;
1196 fRet = None;
1197 return fRet;
1198
1199
1200 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1201 """
1202 Investigates a failed VM run.
1203 """
1204 enmReason = None;
1205 sParentName = oFailedResult.oParent.sName if oFailedResult.oParent else '';
1206 if oFailedResult.sName == 'VBoxWindowsAdditions.exe' >= 0:
1207 enmReason = self.ktReason_Add_Installer_Win_Failed;
1208 # guest control:
1209 elif sParentName == 'Guest Control' and oFailedResult.sName == 'Preparations':
1210 enmReason = self.ktReason_Add_GstCtl_Preparations;
1211 elif oFailedResult.sName == 'Session Basics':
1212 enmReason = self.ktReason_Add_GstCtl_SessionBasics;
1213 elif oFailedResult.sName == 'Session Process References':
1214 enmReason = self.ktReason_Add_GstCtl_SessionProcRefs;
1215 elif oFailedResult.sName == 'Copy from guest':
1216 if sResultLog.find('*** abort action ***') >= 0:
1217 enmReason = self.ktReason_Add_GstCtl_CopyFromGuest_Timeout;
1218 elif oFailedResult.sName == 'Copy to guest':
1219 if sResultLog.find('*** abort action ***') >= 0:
1220 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_Timeout;
1221 elif oFailedResult.sName.find('Session w/ Guest Reboot') >= 0:
1222 enmReason = self.ktReason_Add_GstCtl_Session_Reboot;
1223 # shared folders:
1224 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Automounting':
1225 enmReason = self.ktReason_Add_ShFl_Automount;
1226 elif oFailedResult.sName == 'mmap':
1227 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1228 enmReason = self.ktReason_Add_Mmap_Coherency;
1229 elif sResultLog.find('FlushViewOfFile') >= 0:
1230 enmReason = self.ktReason_Add_FlushViewOfFile;
1231 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Running FsPerf':
1232 enmReason = self.ktReason_Add_ShFl_FsPerf; ## Maybe it would be better to be more specific...
1233
1234 if enmReason is not None:
1235 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1236
1237 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1238 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1239 return False;
1240
1241 def isResultFromGATest(self, oFailedResult):
1242 """
1243 Checks if this result and corresponding log snippet looks like a GA test run.
1244 """
1245 while oFailedResult is not None:
1246 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', ]:
1247 return True;
1248 oFailedResult = oFailedResult.oParent;
1249 return False;
1250
1251
1252 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1253 """
1254 Investigates a failed VM run.
1255 """
1256
1257 def investigateLogSet():
1258 """
1259 Investigates the current set of VM related logs.
1260 """
1261 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1262 % ( len(sResultLog if sResultLog else ''),
1263 len(sVMLog if sVMLog else ''),
1264 len(sKrnlLog if sKrnlLog else ''),
1265 len(sVgaText if sVgaText else ''),
1266 len(sInfoText if sInfoText else ''),
1267 len(sNtHardLog if sNtHardLog else ''),));
1268
1269 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1270 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1271 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1272 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1273 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1274 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1275
1276 # TODO: more
1277
1278 #
1279 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1280 #
1281 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1282 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1283 if sDetails is not None:
1284 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1285 sKey = sDetails.split(' ', 1)[0];
1286 try: sKey = '0x%08X' % (int(sKey, 16),);
1287 except: pass;
1288 if sKey in self.asBsodReasons:
1289 tReason = ( self.ksBsodCategory, sKey );
1290 elif sKey.lower() in self.asBsodReasons: # just in case.
1291 tReason = ( self.ksBsodCategory, sKey.lower() );
1292 else:
1293 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1294 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1295 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1296
1297 fFoundSomething = False;
1298
1299 #
1300 # Look for linux panic.
1301 #
1302 if sKrnlLog is not None:
1303 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1304 if fRet is True:
1305 return fRet;
1306 fFoundSomething |= fRet is None;
1307
1308 #
1309 # Loop thru the simple stuff.
1310 #
1311
1312 # Main log.
1313 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1314 if fRet is True:
1315 return fRet;
1316 fFoundSomething |= fRet is None;
1317
1318 # VM log.
1319 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1320 if fRet is True:
1321 return fRet;
1322 fFoundSomething |= fRet is None;
1323
1324 # Old main + vm log.
1325 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1326 oCaseFile, oFailedResult.idTestResult);
1327 if fRet is True:
1328 return fRet;
1329 fFoundSomething |= fRet is None;
1330
1331 # Continue with vga text.
1332 if sVgaText:
1333 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1334 if fRet is True:
1335 return fRet;
1336 fFoundSomething |= fRet is None;
1337
1338 # Continue with screen hashes.
1339 if sScreenHash is not None:
1340 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1341 if sScreenHash == sHash:
1342 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1343 if fStopOnHit:
1344 return True;
1345 fFoundSomething = True;
1346
1347 # Check VBoxHardening.log.
1348 if sNtHardLog is not None:
1349 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1350 if fRet is True:
1351 return fRet;
1352 fFoundSomething |= fRet is None;
1353
1354 #
1355 # Complicated stuff.
1356 #
1357 dLogs = {
1358 'sVMLog': sVMLog,
1359 'sNtHardLog': sNtHardLog,
1360 'sScreenHash': sScreenHash,
1361 'sKrnlLog': sKrnlLog,
1362 'sVgaText': sVgaText,
1363 'sInfoText': sInfoText,
1364 };
1365
1366 # info.txt.
1367 if sInfoText:
1368 for sNeedle, fnHandler in self.katInfoTextHandlers:
1369 if sInfoText.find(sNeedle) > 0:
1370 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1371 if tReason is not None:
1372 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1373 if fStop:
1374 return True;
1375 fFoundSomething = True;
1376
1377 #
1378 # Check for repeated reboots...
1379 #
1380 if sVMLog is not None:
1381 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1382 if cResets > 10:
1383 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1384 sComment = 'Counted %s reboots' % (cResets,));
1385
1386 return fFoundSomething;
1387
1388 #
1389 # Check if we got any VM or/and kernel logs. Treat them as sets in
1390 # case we run multiple VMs here (this is of course ASSUMING they
1391 # appear in the order that terminateVmBySession uploads them).
1392 #
1393 cTimes = 0;
1394 sVMLog = None;
1395 sNtHardLog = None;
1396 sScreenHash = None;
1397 sKrnlLog = None;
1398 sVgaText = None;
1399 sInfoText = None;
1400 for oFile in oFailedResult.aoFiles:
1401 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1402 if 'VBoxHardening.log' not in oFile.sFile:
1403 if sVMLog is not None:
1404 if investigateLogSet() is True:
1405 return True;
1406 cTimes += 1;
1407 sInfoText = None;
1408 sVgaText = None;
1409 sKrnlLog = None;
1410 sScreenHash = None;
1411 sNtHardLog = None;
1412 sVMLog = oCaseFile.getLogFile(oFile);
1413 else:
1414 sNtHardLog = oCaseFile.getLogFile(oFile);
1415 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1416 sKrnlLog = oCaseFile.getLogFile(oFile);
1417 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1418 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1419 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1420 sInfoText = oCaseFile.getLogFile(oFile);
1421 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1422 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1423 if sScreenHash is not None:
1424 sScreenHash = sScreenHash.lower();
1425 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1426
1427 if ( sVMLog is not None \
1428 or sNtHardLog is not None \
1429 or cTimes == 0) \
1430 and investigateLogSet() is True:
1431 return True;
1432
1433 return None;
1434
1435 def isResultFromVMRun(self, oFailedResult, sResultLog):
1436 """
1437 Checks if this result and corresponding log snippet looks like a VM run.
1438 """
1439
1440 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1441 if sResultLog.find(' startVm') > 0:
1442 return True;
1443
1444 # Any other indicators? No?
1445 _ = oFailedResult;
1446 return False;
1447
1448
1449 ## Things we search a VBoxSVC log for to figure out why something went bust.
1450 katSimpleSvcLogReasons = [
1451 # ( Whether to stop on hit, reason tuple, needle text. )
1452 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1453 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1454 ];
1455
1456 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1457 """
1458 Check the VBoxSVC log for a single VM run.
1459 """
1460 if sSvcLog:
1461 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1462 if fRet is True or fRet is None:
1463 return True;
1464 return False;
1465
1466 def investigateNtHardLogForVMRun(self, oCaseFile):
1467 """
1468 Check if the hardening log for a single VM run contains VM crash indications.
1469 """
1470 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1471 for oLogFile in aoLogFiles:
1472 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1473 sLog = oCaseFile.getLogFile(oLogFile);
1474 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1475 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1476 return False;
1477
1478
1479 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1480 """
1481 Checks out a VBox VM test.
1482
1483 This is generic investigation of a test running one or more VMs, like
1484 for example a smoke test or a guest installation test.
1485
1486 The fSingleVM parameter is a hint, which probably won't come in useful.
1487 """
1488 _ = fSingleVM;
1489
1490 #
1491 # Get a list of test result failures we should be looking into and the main log.
1492 #
1493 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1494 sMainLog = oCaseFile.getMainLog();
1495
1496 #
1497 # There are a set of errors ending up on the top level result record.
1498 # Should deal with these first.
1499 #
1500 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1501 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1502 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1503 # ignore other failures in the test if we're not a little bit careful here.
1504 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1505 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1506 return self.caseClosed(oCaseFile);
1507
1508 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1509 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1510 (' Timeout: ', ' Attempting to abort child...',) ):
1511 if sMainLog.find('*** glibc detected *** /') > 0:
1512 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1513 else:
1514 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1515 return self.caseClosed(oCaseFile);
1516
1517 # Look for heap corruption without visible hang.
1518 if sMainLog.find('*** glibc detected *** /') > 0 \
1519 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1520 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1521 return self.caseClosed(oCaseFile);
1522
1523 # Out of memory w/ timeout.
1524 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1525 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1526 return self.caseClosed(oCaseFile);
1527
1528 # Stale files like vts_rm.exe (windows).
1529 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1530 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1531 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1532 return self.caseClosed(oCaseFile);
1533
1534 #
1535 # XPCOM screwup
1536 #
1537 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1538 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1539 return self.caseClosed(oCaseFile);
1540
1541 #
1542 # Go thru each failed result.
1543 #
1544 for oFailedResult in aoFailedResults:
1545 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1546 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1547 if oFailedResult.sName == 'Installing VirtualBox':
1548 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1549
1550 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1551 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1552
1553 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1554 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1555
1556 elif self.isResultFromGATest(oFailedResult):
1557 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1558
1559 elif sResultLog.find('most likely not unique') > 0:
1560 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1561 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1562 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1563
1564 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1565 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1566 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1567
1568 elif sResultLog.find('** error: no action was specified') > 0 \
1569 or sResultLog.find('(len(self._asXml, asText))') > 0:
1570 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1571
1572 else:
1573 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1574 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1575
1576 #
1577 # Windows python/com screwup.
1578 #
1579 if sMainLog.find('ModuleNotFoundError: No module named \'win32com.gen_py') > 0:
1580 oCaseFile.noteReason(self.ktReason_Host_win32com_gen_py);
1581 return self.caseClosed(oCaseFile);
1582
1583 #
1584 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1585 #
1586 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1587 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1588 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1589 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1590 return self.caseClosed(oCaseFile);
1591 if self.investigateNtHardLogForVMRun(oCaseFile):
1592 return self.caseClosed(oCaseFile);
1593
1594 #
1595 # Report home and close the case if we got them all, otherwise log it.
1596 #
1597 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1598 return self.caseClosed(oCaseFile);
1599
1600 if oCaseFile.dReasonForResultId:
1601 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1602 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1603 else:
1604 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1605 return False;
1606
1607
1608 ## Things we search a main log for to figure out why something in the API test went bust.
1609 katSimpleApiMainLogReasons = [
1610 # ( Whether to stop on hit, reason tuple, needle text. )
1611 ( True, ktReason_Networking_Nonexistent_host_nic,
1612 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1613 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1614 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1615 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1616 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1617 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1618 ( True, ktReason_API_MoveVM_InvalidParameter,
1619 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1620 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1621 ];
1622
1623 def investigateVBoxApiTest(self, oCaseFile):
1624 """
1625 Checks out a VBox API test.
1626 """
1627
1628 #
1629 # Get a list of test result failures we should be looking into and the main log.
1630 #
1631 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1632 sMainLog = oCaseFile.getMainLog();
1633
1634 #
1635 # Go thru each failed result.
1636 #
1637 for oFailedResult in aoFailedResults:
1638 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1639 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1640 if oFailedResult.sName == 'Installing VirtualBox':
1641 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1642
1643 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1644 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1645
1646 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1647 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1648
1649 else:
1650 fFoundSomething = False;
1651 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1652 if sResultLog.find(sNeedle) > 0:
1653 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1654 fFoundSomething = True;
1655 if fStopOnHit:
1656 break;
1657 if fFoundSomething:
1658 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1659 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1660
1661 #
1662 # Report home and close the case if we got them all, otherwise log it.
1663 #
1664 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1665 return self.caseClosed(oCaseFile);
1666
1667 if oCaseFile.dReasonForResultId:
1668 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1669 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1670 else:
1671 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1672 return False;
1673
1674
1675 def reasoningFailures(self):
1676 """
1677 Guess the reason for failures.
1678 """
1679 #
1680 # Get a list of failed test sets without any assigned failure reason.
1681 #
1682 cGot = 0;
1683 if not self.oConfig.aidTestSets:
1684 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1685 tsNow = self.tsNow);
1686 else:
1687 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1688 for oTestSet in aoTestSets:
1689 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1690 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1691
1692 #
1693 # Open a case file and assign it to the right investigator.
1694 #
1695 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1696 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1697 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1698 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1699 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1700
1701 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1702
1703 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1704 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1705 fRc = self.investigateBadTestBox(oCaseFile);
1706
1707 elif oCaseFile.isVBoxUnitTest():
1708 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1709 fRc = self.investigateVBoxUnitTest(oCaseFile);
1710
1711 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1712 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1713 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1714
1715 elif oCaseFile.isVBoxUSBTest():
1716 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1717 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1718
1719 elif oCaseFile.isVBoxStorageTest():
1720 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1721 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1722
1723 elif oCaseFile.isVBoxGAsTest():
1724 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1725 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1726
1727 elif oCaseFile.isVBoxAPITest():
1728 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1729 fRc = self.investigateVBoxApiTest(oCaseFile);
1730
1731 elif oCaseFile.isVBoxBenchmarkTest():
1732 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1733 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1734
1735 elif oCaseFile.isVBoxSmokeTest():
1736 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1737 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1738
1739 elif oCaseFile.isVBoxSerialTest():
1740 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1741 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1742
1743 else:
1744 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1745 fRc = False;
1746 cGot += fRc is True;
1747
1748 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1749 return 0;
1750
1751
1752 def main(self):
1753 """
1754 The 'main' function.
1755 Return exit code (0, 1, etc).
1756 """
1757 # Database stuff.
1758 self.oDb = TMDatabaseConnection()
1759 self.oTestResultLogic = TestResultLogic(self.oDb);
1760 self.oTestSetLogic = TestSetLogic(self.oDb);
1761 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1762 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1763 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1764 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1765
1766 # Get a fix on our 'now' before we do anything..
1767 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1768 self.tsNow = self.oDb.fetchOne();
1769
1770 # If we're suppost to commit anything we need to get our user ID.
1771 rcExit = 0;
1772 if self.oConfig.fRealRun:
1773 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1774 if self.oLogin is None:
1775 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1776 else:
1777 self.uidSelf = self.oLogin.uid;
1778
1779 #
1780 # Do the stuff.
1781 #
1782 if rcExit == 0:
1783 rcExit = self.selfCheck();
1784 if rcExit == 0:
1785 rcExit = self.badTestBoxManagement();
1786 rcExit2 = self.reasoningFailures();
1787 if rcExit == 0:
1788 rcExit = rcExit2;
1789 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1790 if rcExit == 0:
1791 rcExit = self.badTestBoxManagement();
1792
1793 # Cleanup.
1794 self.oFailureReasonLogic = None;
1795 self.oTestResultFailureLogic = None;
1796 self.oTestSetLogic = None;
1797 self.oTestResultLogic = None;
1798 self.oDb.close();
1799 self.oDb = None;
1800 if self.oLogFile is not None:
1801 self.oLogFile.close();
1802 self.oLogFile = None;
1803 return rcExit;
1804
1805if __name__ == '__main__':
1806 sys.exit(VirtualTestSheriff().main());
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette