VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 94129

Last change on this file since 94129 was 94129, checked in by vboxsync, 3 years ago

testmanager: pylint 2.9.6 adjustments (mostly about using sub-optimal looping and 'with' statements).

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 89.2 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 94129 2022-03-08 14:57:25Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2022 Oracle Corporation
20
21This file is part of VirtualBox Open Source Edition (OSE), as
22available from http://www.virtualbox.org. This file is free software;
23you can redistribute it and/or modify it under the terms of the GNU
24General Public License (GPL) as published by the Free Software
25Foundation, in version 2 as it comes in the "COPYING" file of the
26VirtualBox OSE distribution. VirtualBox OSE is distributed in the
27hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
28
29The contents of this file may alternatively be used under the terms
30of the Common Development and Distribution License Version 1.0
31(CDDL) only, as it comes in the "COPYING.CDDL" file of the
32VirtualBox OSE distribution, in which case the provisions of the
33CDDL are applicable instead of those of the GPL.
34
35You may elect to license modified versions of this file under the
36terms and conditions of either the GPL or the CDDL or both.
37"""
38__version__ = "$Revision: 94129 $"
39
40
41# Standard python imports
42import hashlib;
43import os;
44import re;
45import smtplib;
46#import subprocess;
47import sys;
48from email.mime.multipart import MIMEMultipart;
49from email.mime.text import MIMEText;
50from email.utils import COMMASPACE;
51
52if sys.version_info[0] >= 3:
53 from io import BytesIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
54else:
55 from StringIO import StringIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
56from optparse import OptionParser; # pylint: disable=deprecated-module
57from PIL import Image; # pylint: disable=import-error
58
59# Add Test Manager's modules path
60g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
61sys.path.append(g_ksTestManagerDir);
62
63# Test Manager imports
64from common import utils;
65from testmanager.core.db import TMDatabaseConnection;
66from testmanager.core.build import BuildDataEx;
67from testmanager.core.failurereason import FailureReasonLogic;
68from testmanager.core.testbox import TestBoxLogic, TestBoxData;
69from testmanager.core.testcase import TestCaseDataEx;
70from testmanager.core.testgroup import TestGroupData;
71from testmanager.core.testset import TestSetLogic, TestSetData;
72from testmanager.core.testresults import TestResultLogic, TestResultFileData;
73from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
74from testmanager.core.useraccount import UserAccountLogic;
75from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
76 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
77
78# Python 3 hacks:
79if sys.version_info[0] >= 3:
80 xrange = range; # pylint: disable=redefined-builtin,invalid-name
81
82
83class VirtualTestSheriffCaseFile(object):
84 """
85 A failure investigation case file.
86
87 """
88
89
90 ## Max log file we'll read into memory. (256 MB)
91 kcbMaxLogRead = 0x10000000;
92
93 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
94 self.oSheriff = oSheriff;
95 self.oTestSet = oTestSet; # TestSetData
96 self.oTree = oTree; # TestResultDataEx
97 self.oBuild = oBuild; # BuildDataEx
98 self.oTestBox = oTestBox; # TestBoxData
99 self.oTestGroup = oTestGroup; # TestGroupData
100 self.oTestCase = oTestCase; # TestCaseDataEx
101 self.sMainLog = ''; # The main log file. Empty string if not accessible.
102 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
103
104 # Generate a case file name.
105 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
106 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
107 % ( self.oTestSet.idTestSet,
108 self.oTestCase.sName,
109 self.oTestBox.sName,
110 self.oTestBox.sOs,
111 self.oTestBox.sOsVersion,
112 self.oTestBox.sCpuArch,
113 self.oTestBox.sCpuName,
114 self.oTestBox.sCpuVendor,
115 self.oBuild.oCat.sProduct,
116 self.oBuild.oCat.sBranch,
117 self.oBuild.oCat.sType,
118 self.oBuild.iRevision, );
119
120 # Investigation notes.
121 self.tReason = None; # None or one of the ktReason_XXX constants.
122 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
123 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
124
125 #
126 # Reason.
127 #
128
129 def noteReason(self, tReason):
130 """ Notes down a possible reason. """
131 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
132 self.tReason = tReason;
133 return True;
134
135 def noteReasonForId(self, tReason, idTestResult, sComment = None):
136 """ Notes down a possible reason for a specific test result. """
137 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
138 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
139 (u' (%s)' % (sComment,)) if sComment is not None else ''));
140 self.dReasonForResultId[idTestResult] = tReason;
141 if sComment is not None:
142 self.dCommentForResultId[idTestResult] = sComment;
143 return True;
144
145
146 #
147 # Test classification.
148 #
149
150 def isVBoxTest(self):
151 """ Test classification: VirtualBox (using the build) """
152 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
153
154 def isVBoxUnitTest(self):
155 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
156 return self.isVBoxTest() \
157 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower().startswith('misc: unit tests'));
158
159 def isVBoxInstallTest(self):
160 """ Test case classification: VirtualBox Guest installation test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('install:');
163
164 def isVBoxUnattendedInstallTest(self):
165 """ Test case classification: VirtualBox Guest installation test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('uinstall:');
168
169 def isVBoxUSBTest(self):
170 """ Test case classification: VirtualBox USB test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('usb:');
173
174 def isVBoxStorageTest(self):
175 """ Test case classification: VirtualBox Storage test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('storage:');
178
179 def isVBoxGAsTest(self):
180 """ Test case classification: VirtualBox Guest Additions test. """
181 return self.isVBoxTest() \
182 and ( self.oTestCase.sName.lower().startswith('guest additions')
183 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
184
185 def isVBoxAPITest(self):
186 """ Test case classification: VirtualBox API test. """
187 return self.isVBoxTest() \
188 and self.oTestCase.sName.lower().startswith('api:');
189
190 def isVBoxBenchmarkTest(self):
191 """ Test case classification: VirtualBox Benchmark test. """
192 return self.isVBoxTest() \
193 and self.oTestCase.sName.lower().startswith('benchmark:');
194
195 def isVBoxSmokeTest(self):
196 """ Test case classification: Smoke test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('smoketest');
199
200 def isVBoxSerialTest(self):
201 """ Test case classification: Smoke test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('serial:');
204
205
206 #
207 # Utility methods.
208 #
209
210 def getMainLog(self):
211 """
212 Tries to read the main log file since this will be the first source of information.
213 """
214 if self.sMainLog:
215 return self.sMainLog;
216 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
217 if oFile is not None:
218 try:
219 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
220 except Exception as oXcpt:
221 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
222 self.sMainLog = '';
223 else:
224 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
225 return self.sMainLog;
226
227 def getLogFile(self, oFile):
228 """
229 Tries to read the given file as a utf-8 log file.
230 oFile is a TestFileDataEx instance.
231 Returns empty string if problems opening or reading the file.
232 """
233 sContent = '';
234 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
235 if oFile is not None:
236 try:
237 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
238 except Exception as oXcpt:
239 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
240 else:
241 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
242 return sContent;
243
244 def getSvcLog(self):
245 """
246 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
247 Note! Returns the first VBoxSVC log file we find.
248 """
249 if not self.sSvcLog:
250 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
251 if aoSvcLogFiles:
252 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
253 return self.sSvcLog;
254
255 def getScreenshotSha256(self, oFile):
256 """
257 Tries to read the given screenshot file, uncompress it, and do SHA-2
258 on the raw pixels.
259 Returns SHA-2 digest string on success, None on failure.
260 """
261 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
262 try:
263 abImageFile = oImgFile.read();
264 except Exception as oXcpt:
265 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
266 else:
267 try:
268 oImage = Image.open(BytesIO(abImageFile));
269 except Exception as oXcpt:
270 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
271 else:
272 try:
273 oHash = hashlib.sha256();
274 if hasattr(oImage, 'tobytes'):
275 oHash.update(oImage.tobytes());
276 else:
277 oHash.update(oImage.tostring()); # pylint: disable=no-member
278 except Exception as oXcpt:
279 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
280 else:
281 return oHash.hexdigest();
282 return None;
283
284
285
286 def isSingleTestFailure(self):
287 """
288 Figure out if this is a single test failing or if it's one of the
289 more complicated ones.
290 """
291 if self.oTree.cErrors == 1:
292 return True;
293 if self.oTree.deepCountErrorContributers() <= 1:
294 return True;
295 return False;
296
297
298
299class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
300 """
301 Add build info into Test Manager database.
302 """
303
304 ## The user account for the virtual sheriff.
305 ksLoginName = 'vsheriff';
306
307 def __init__(self):
308 """
309 Parse command line.
310 """
311 self.oDb = None;
312 self.tsNow = None;
313 self.oTestResultLogic = None;
314 self.oTestSetLogic = None;
315 self.oFailureReasonLogic = None; # FailureReasonLogic;
316 self.oTestResultFailureLogic = None; # TestResultFailureLogic
317 self.oLogin = None;
318 self.uidSelf = -1;
319 self.oLogFile = None;
320 self.asBsodReasons = [];
321 self.asUnitTestReasons = [];
322
323 oParser = OptionParser();
324 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
325 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
326 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
327 help = 'Work period specified in hours. Defauls is 2 hours.');
328 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
329 help = 'Whether to commit the findings to the database. Default is a dry run.');
330 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
331 help = 'Only investigate this one. Accumulates IDs when repeated.');
332 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
333 help = 'Quiet execution');
334 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
335 help = 'Where to log messages.');
336 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
337 help = 'Enables debug mode.');
338
339 (self.oConfig, _) = oParser.parse_args();
340
341 if self.oConfig.sLogFile:
342 self.oLogFile = open(self.oConfig.sLogFile, "a"); # pylint: disable=consider-using-with
343 self.oLogFile.write('VirtualTestSheriff: $Revision: 94129 $ \n');
344
345
346 def eprint(self, sText):
347 """
348 Prints error messages.
349 Returns 1 (for exit code usage.)
350 """
351 print('error: %s' % (sText,));
352 if self.oLogFile is not None:
353 if sys.version_info[0] >= 3:
354 self.oLogFile.write(u'error: %s\n' % (sText,));
355 else:
356 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
357 return 1;
358
359 def dprint(self, sText):
360 """
361 Prints debug info.
362 """
363 if self.oConfig.fDebug:
364 if not self.oConfig.fQuiet:
365 print('debug: %s' % (sText, ));
366 if self.oLogFile is not None:
367 if sys.version_info[0] >= 3:
368 self.oLogFile.write(u'debug: %s\n' % (sText,));
369 else:
370 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
371 return 0;
372
373 def vprint(self, sText):
374 """
375 Prints verbose info.
376 """
377 if not self.oConfig.fQuiet:
378 print('info: %s' % (sText,));
379 if self.oLogFile is not None:
380 if sys.version_info[0] >= 3:
381 self.oLogFile.write(u'info: %s\n' % (sText,));
382 else:
383 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
384 return 0;
385
386 def getFailureReason(self, tReason):
387 """ Gets the failure reason object for tReason. """
388 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
389
390 def selfCheck(self):
391 """ Does some self checks, looking up things we expect to be in the database and such. """
392 rcExit = 0;
393 for sAttr in dir(self.__class__):
394 if sAttr.startswith('ktReason_'):
395 tReason = getattr(self.__class__, sAttr);
396 oFailureReason = self.getFailureReason(tReason);
397 if oFailureReason is None:
398 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
399 % (tReason[1], tReason[0],));
400
401 # Check the user account as well.
402 if self.oLogin is None:
403 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
404 if oLogin is None:
405 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
406 return rcExit;
407
408 def sendEmailAlert(self, uidAuthor, sBodyText):
409 """
410 Sends email alert.
411 """
412
413 # Get author email
414 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
415 sFrom = self.oDb.fetchOne();
416 if sFrom is not None:
417 sFrom = sFrom[0];
418 else:
419 sFrom = g_ksAlertFrom;
420
421 # Gather recipient list.
422 asEmailList = [];
423 for sUser in g_asAlertList:
424 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
425 sEmail = self.oDb.fetchOne();
426 if sEmail:
427 asEmailList.append(sEmail[0]);
428 if not asEmailList:
429 return self.eprint('No email addresses to send alter to!');
430
431 # Compose the message.
432 oMsg = MIMEMultipart();
433 oMsg['From'] = sFrom;
434 oMsg['To'] = COMMASPACE.join(asEmailList);
435 oMsg['Subject'] = g_ksAlertSubject;
436 oMsg.attach(MIMEText(sBodyText, 'plain'))
437
438 # Try send it.
439 try:
440 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
441 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
442 oSMTP.quit()
443 except smtplib.SMTPException as oXcpt:
444 return self.eprint('Failed to send mail: %s' % (oXcpt,));
445
446 return 0;
447
448 def badTestBoxManagement(self):
449 """
450 Looks for bad test boxes and first tries once to reboot them then disables them.
451 """
452 rcExit = 0;
453
454 #
455 # We skip this entirely if we're running in the past and not in harmless debug mode.
456 #
457 if self.oConfig.cStartHoursAgo != 0 \
458 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
459 return rcExit;
460 tsNow = self.tsNow if self.oConfig.fDebug else None;
461 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
462 oTestBoxLogic = TestBoxLogic(self.oDb);
463
464 #
465 # Generate a list of failures reasons we consider bad-testbox behavior.
466 #
467 aidFailureReasons = [
468 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
469 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
470 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
471 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
472 ];
473
474 #
475 # Get list of bad test boxes for given period and check them out individually.
476 #
477 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
478 aidFailureReasons = aidFailureReasons);
479 for idTestBox in aidBadTestBoxes:
480 # Skip if the testbox is already disabled or has a pending reboot command.
481 try:
482 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
483 except Exception as oXcpt:
484 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
485 continue;
486 if not oTestBox.fEnabled:
487 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
488 % ( idTestBox, oTestBox.sName, ));
489 continue;
490 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
491 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
492 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
493 continue;
494
495 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
496 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
497 cOkay = 0;
498 cBad = 0;
499 iFirstOkay = len(aoSets);
500 for iSet, oSet in enumerate(aoSets):
501 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
502 cBad += 1;
503 else:
504 # Check for bad failure reasons.
505 oFailure = None;
506 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
507 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
508 aoFailedResults = oTree.getListOfFailures();
509 for oFailedResult in aoFailedResults:
510 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
511 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
512 break;
513 oFailure = None;
514 if oFailure is not None:
515 cBad += 1;
516 else:
517 # This is an okay test result then.
518 ## @todo maybe check the elapsed time here, it could still be a bad run?
519 cOkay += 1;
520 iFirstOkay = min(iFirstOkay, iSet);
521 if iSet > 10:
522 break;
523
524 # We react if there are two or more bad-testbox statuses at the head of the
525 # history and at least three in the last 10 results.
526 if iFirstOkay >= 2 and cBad > 2:
527 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
528 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
529 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
530 self.vprint(sComment);
531 self.sendEmailAlert(self.uidSelf, sComment);
532 if self.oConfig.fRealRun is True:
533 try:
534 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
535 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
536 % (iFirstOkay, cBad, cOkay),);
537 except Exception as oXcpt:
538 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
539 else:
540 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
541 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
542 self.vprint(sComment);
543 self.sendEmailAlert(self.uidSelf, sComment);
544 if self.oConfig.fRealRun is True:
545 try:
546 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
547 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
548 % (iFirstOkay, cBad, cOkay),);
549 except Exception as oXcpt:
550 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
551 else:
552 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
553 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
554
555 ## @todo r=bird: review + rewrite;
556 ## - no selecting here, that belongs in the core/*.py files.
557 ## - preserve existing comments.
558 ## - doing way too much in the try/except block.
559 ## - No password quoting in the sshpass command that always fails (127).
560 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
561 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
562 ## Besides, it need to be constant elsewhere in the file, not a variable here.
563 ##
564 ##
565 ## Reset hanged testboxes
566 ##
567 #cStatusTimeoutMins = 10;
568 #
569 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
570 # ' FROM TestBoxStatuses, TestBoxes\n'
571 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
572 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
573 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
574 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
575 #for idTestBox in self.oDb.fetchAll():
576 # idTestBox = idTestBox[0];
577 # try:
578 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
579 # except Exception as oXcpt:
580 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
581 # continue;
582 # # Skip if the testbox is already disabled, already reset or there's no iLOM
583 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
584 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
585 # % ( idTestBox, oTestBox.sName, ));
586 # continue;
587 # ## @todo get iLOM credentials from a table?
588 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
589 # try:
590 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
591 # sStdout = oPs.communicate()[0];
592 # iRC = oPs.wait();
593 #
594 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
595 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
596 #
597 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
598 # self.vprint(sComment);
599 # self.sendEmailAlert(self.uidSelf, sComment);
600 #
601 # except Exception as oXcpt:
602 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
603 #
604 return rcExit;
605
606
607 ## @name Failure reasons we know.
608 ## @{
609
610 ktReason_Add_Installer_Win_Failed = ( 'Additions', 'Win GA install' );
611 ktReason_Add_ShFl_Automount = ( 'Additions', 'Automounting' );
612 ktReason_Add_ShFl_FsPerf = ( 'Additions', 'FsPerf' );
613 ktReason_Add_ShFl_FsPerf_Abend = ( 'Additions', 'FsPerf abend' );
614 ktReason_Add_GstCtl_Preparations = ( 'Additions', 'GstCtl preparations' );
615 ktReason_Add_GstCtl_SessionBasics = ( 'Additions', 'Session basics' );
616 ktReason_Add_GstCtl_SessionProcRefs = ( 'Additions', 'Session process' );
617 ktReason_Add_GstCtl_Session_Reboot = ( 'Additions', 'Session reboot' );
618 ktReason_Add_GstCtl_CopyFromGuest_Timeout = ( 'Additions', 'CopyFromGuest timeout' );
619 ktReason_Add_GstCtl_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest timeout' );
620 ktReason_Add_GstCtl_CopyToGuest_DstEmpty = ( 'Additions', 'CopyToGuest dst empty' );
621 ktReason_Add_GstCtl_CopyToGuest_DstExists = ( 'Additions', 'CopyToGuest dst exists' );
622 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
623 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
624 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
625 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
626 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
627 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
628 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
629 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
630 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
631 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
632 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
633 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
634 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
635 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
636 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
637 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
638 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
639 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
640 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
641 ktReason_Host_InstallationWantReboot = ( 'Host', 'Installation want reboot' );
642 ktReason_Host_InvalidPackage = ( 'Host', 'ERROR_INSTALL_PACKAGE_INVALID' );
643 ktReason_Host_InstallSourceAbsent = ( 'Host', 'ERROR_INSTALL_SOURCE_ABSENT' );
644 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
645 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
646 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
647 ktReason_Host_win32com_gen_py = ( 'Host', 'win32com.gen_py' );
648 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
649 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
650 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
651 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
652 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
653 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
654 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
655 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
656 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
657 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
658 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
659 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
660 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
661 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
662 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
663 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
664 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
665 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
666 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
667 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
668 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
669 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
670 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
671 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
672 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
673 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
674 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
675 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
676 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
677 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
678 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
679 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
680 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
681 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
682 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
683 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
684 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
685 ## @}
686
687 ## BSOD category.
688 ksBsodCategory = 'BSOD';
689 ## Special reason indicating that the flesh and blood sheriff has work to do.
690 ksBsodAddNew = 'Add new BSOD';
691
692 ## Unit test category.
693 ksUnitTestCategory = 'Unit';
694 ## Special reason indicating that the flesh and blood sheriff has work to do.
695 ksUnitTestAddNew = 'Add new';
696
697 ## Used for indica that we shouldn't report anything for this test result ID and
698 ## consider promoting the previous error to test set level if it's the only one.
699 ktHarmless = ( 'Probably', 'Caused by previous error' );
700
701
702 def caseClosed(self, oCaseFile):
703 """
704 Reports the findings in the case and closes it.
705 """
706 #
707 # Log it and create a dReasonForReasultId we can use below.
708 #
709 dCommentForResultId = oCaseFile.dCommentForResultId;
710 if oCaseFile.dReasonForResultId:
711 # Must weed out ktHarmless.
712 dReasonForResultId = {};
713 for idKey, tReason in oCaseFile.dReasonForResultId.items():
714 if tReason is not self.ktHarmless:
715 dReasonForResultId[idKey] = tReason;
716 if not dReasonForResultId:
717 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
718 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
719 return False;
720
721 # Try promote to single reason.
722 atValues = dReasonForResultId.values();
723 fSingleReason = True;
724 if len(dReasonForResultId) == 1 and next(iter(dReasonForResultId.keys())) != oCaseFile.oTestSet.idTestResult:
725 self.dprint(u'Promoting single reason to whole set: %s' % (next(iter(atValues)),));
726 elif len(dReasonForResultId) > 1 and len(atValues) == list(atValues).count(next(iter(atValues))):
727 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), next(iter(atValues))));
728 else:
729 fSingleReason = False;
730 if fSingleReason:
731 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(atValues)), };
732 if dCommentForResultId:
733 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(dCommentForResultId.values())), };
734 elif oCaseFile.tReason is not None:
735 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
736 else:
737 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
738 return False;
739
740 self.vprint(u'Closing %s with following reason%s: %s'
741 % ( oCaseFile.sName, 's' if len(dReasonForResultId) > 1 else '', dReasonForResultId, ));
742
743 #
744 # Add the test failure reason record(s).
745 #
746 for idTestResult, tReason in dReasonForResultId.items():
747 oFailureReason = self.getFailureReason(tReason);
748 if oFailureReason is not None:
749 sComment = 'Set by $Revision: 94129 $' # Handy for reverting later.
750 if idTestResult in dCommentForResultId:
751 sComment += ': ' + dCommentForResultId[idTestResult];
752
753 oAdd = TestResultFailureData();
754 oAdd.initFromValues(idTestResult = idTestResult,
755 idFailureReason = oFailureReason.idFailureReason,
756 uidAuthor = self.uidSelf,
757 idTestSet = oCaseFile.oTestSet.idTestSet,
758 sComment = sComment,);
759 if self.oConfig.fRealRun:
760 try:
761 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
762 except Exception as oXcpt:
763 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
764 % (oXcpt, oAdd, oCaseFile.sLongName,));
765 else:
766 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
767 return True;
768
769 #
770 # Tools for assiting log parsing.
771 #
772
773 @staticmethod
774 def matchFollowedByLines(sStr, off, asFollowingLines):
775 """ Worker for isThisFollowedByTheseLines. """
776
777 # Advance off to the end of the line.
778 off = sStr.find('\n', off);
779 if off < 0:
780 return False;
781 off += 1;
782
783 # Match each string with the subsequent lines.
784 for iLine, sLine in enumerate(asFollowingLines):
785 offEnd = sStr.find('\n', off);
786 if offEnd < 0:
787 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
788 if sLine and sStr.find(sLine, off, offEnd) < 0:
789 return False;
790
791 # next line.
792 off = offEnd + 1;
793
794 return True;
795
796 @staticmethod
797 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
798 """
799 Looks for a line contining sFirst which is then followed by lines
800 with the strings in asFollowingLines. (No newline chars anywhere!)
801 Returns True / False.
802 """
803 off = sStr.find(sFirst, 0);
804 while off >= 0:
805 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
806 return True;
807 off = sStr.find(sFirst, off + 1);
808 return False;
809
810 @staticmethod
811 def findAndReturnRestOfLine(sHaystack, sNeedle):
812 """
813 Looks for sNeedle in sHaystack.
814 Returns The text following the needle up to the end of the line.
815 Returns None if not found.
816 """
817 if sHaystack is None:
818 return None;
819 off = sHaystack.find(sNeedle);
820 if off < 0:
821 return None;
822 off += len(sNeedle)
823 offEol = sHaystack.find('\n', off);
824 if offEol < 0:
825 offEol = len(sHaystack);
826 return sHaystack[off:offEol]
827
828 @staticmethod
829 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
830 """
831 Looks for sNeedle in zeroe or more haystacks (asHaystack).
832 Returns The text following the first needed found up to the end of the line.
833 Returns None if not found.
834 """
835 for sHaystack in asHaystacks:
836 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
837 if sRet is not None:
838 return sRet;
839 return None;
840
841
842 #
843 # The investigative units.
844 #
845
846 katSimpleInstallUninstallMainLogReasons = [
847 # ( Whether to stop on hit, reason tuple, needle text. )
848 ( False, ktReason_Host_LeftoverService,
849 'SERVICE_NAME: vbox' ),
850 ( False, ktReason_Host_LeftoverService,
851 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
852 ];
853
854 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
855 'darwin': [
856 # ( Whether to stop on hit, reason tuple, needle text. )
857 ( True, ktReason_Host_DriverNotUnloading,
858 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
859 ],
860 'linux': [
861 # ( Whether to stop on hit, reason tuple, needle text. )
862 ( True, ktReason_Host_DriverNotCompilable,
863 'This system is not currently set up to build kernel modules' ),
864 ( True, ktReason_Host_DriverNotCompilable,
865 'This system is currently not set up to build kernel modules' ),
866 ( True, ktReason_Host_InstallationFailed,
867 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
868 ( True, ktReason_Host_DriverNotUnloading,
869 'Cannot unload module vboxdrv'),
870 ],
871 'solaris': [
872 # ( Whether to stop on hit, reason tuple, needle text. )
873 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
874 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
875 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
876 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
877 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
878 ],
879 'win': [
880 # ( Whether to stop on hit, reason tuple, needle text. )
881 ( True, ktReason_Host_InstallationWantReboot, 'ERROR_SUCCESS_REBOOT_REQUIRED' ),
882 ( False, ktReason_Host_InstallationFailed, 'Installation error.' ),
883 ( True, ktReason_Host_InvalidPackage, 'Uninstaller failed, exit code: 1620' ),
884 ( True, ktReason_Host_InstallSourceAbsent, 'Uninstaller failed, exit code: 1612' ),
885 ],
886 };
887
888
889 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
890 """
891 Investigates an install or uninstall failure.
892
893 We lump the two together since the installation typically also performs
894 an uninstall first and will be seeing similar issues to the uninstall.
895 """
896 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
897
898 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
899 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
900 return True;
901
902 atSimple = self.katSimpleInstallUninstallMainLogReasons;
903 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
904 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
905
906 fFoundSomething = False;
907 for fStopOnHit, tReason, sNeedle in atSimple:
908 if sResultLog.find(sNeedle) > 0:
909 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
910 if fStopOnHit:
911 return True;
912 fFoundSomething = True;
913
914 return fFoundSomething if fFoundSomething else None;
915
916
917 def investigateBadTestBox(self, oCaseFile):
918 """
919 Checks out bad-testbox statuses.
920 """
921 _ = oCaseFile;
922 return False;
923
924
925 def investigateVBoxUnitTest(self, oCaseFile):
926 """
927 Checks out a VBox unittest problem.
928 """
929
930 #
931 # Process simple test case failures first, using their name as reason.
932 # We do the reason management just like for BSODs.
933 #
934 cRelevantOnes = 0;
935 sMainLog = oCaseFile.getMainLog();
936 aoFailedResults = oCaseFile.oTree.getListOfFailures();
937 for oFailedResult in aoFailedResults:
938 if oFailedResult is oCaseFile.oTree:
939 self.vprint('TODO: toplevel failure');
940 cRelevantOnes += 1
941
942 elif oFailedResult.sName == 'Installing VirtualBox':
943 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
944 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
945 cRelevantOnes += 1
946
947 elif oFailedResult.sName == 'Uninstalling VirtualBox':
948 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
949 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
950 cRelevantOnes += 1
951
952 elif oFailedResult.oParent is not None:
953 # Get the 2nd level node because that's where we'll find the unit test name.
954 while oFailedResult.oParent.oParent is not None:
955 oFailedResult = oFailedResult.oParent;
956
957 # Only report a failure once.
958 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
959 sKey = oFailedResult.sName;
960 if sKey.startswith('testcase/'):
961 sKey = sKey[9:];
962 if sKey in self.asUnitTestReasons:
963 tReason = ( self.ksUnitTestCategory, sKey );
964 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
965 else:
966 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
967 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
968 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
969 cRelevantOnes += 1
970 else:
971 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
972
973 #
974 # If we've caught all the relevant ones by now, report the result.
975 #
976 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
977 return self.caseClosed(oCaseFile);
978 return False;
979
980 def extractGuestCpuStack(self, sInfoText):
981 """
982 Extracts the guest CPU stacks from the input file.
983
984 Returns a dictionary keyed by the CPU number, value being a list of
985 raw stack lines (no header).
986 Returns empty dictionary if no stacks where found.
987 """
988 dRet = {};
989 off = 0;
990 while True:
991 # Find the stack.
992 offStart = sInfoText.find('=== start guest stack VCPU ', off);
993 if offStart < 0:
994 break;
995 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
996 if offEnd >= 0:
997 offEnd += 3;
998 else:
999 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
1000 if offEnd < 0:
1001 offEnd = len(sInfoText);
1002
1003 sStack = sInfoText[offStart : offEnd];
1004 sStack = sStack.replace('\r',''); # paranoia
1005 asLines = sStack.split('\n');
1006
1007 # Figure the CPU.
1008 asWords = asLines[0].split();
1009 if len(asWords) < 6 or not asWords[5].isdigit():
1010 break;
1011 iCpu = int(asWords[5]);
1012
1013 # Add it and advance.
1014 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
1015 off = offEnd;
1016 return dRet;
1017
1018 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
1019 """ Investigates kvm_lock_spinning deadlocks """
1020 #
1021 # Extract the stacks. We need more than one CPU to create a deadlock.
1022 #
1023 dStacks = self.extractGuestCpuStack(sInfoText);
1024 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1025 if len(dStacks) >= 2:
1026 #
1027 # Examin each of the stacks. Each must have kvm_lock_spinning in
1028 # one of the first three entries.
1029 #
1030 cHits = 0;
1031 for asBacktrace in dStacks.values():
1032 for iFrame in xrange(min(3, len(asBacktrace))):
1033 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1034 cHits += 1;
1035 break;
1036 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1037 if cHits == len(dStacks):
1038 return (True, self.ktReason_VMM_kvm_lock_spinning);
1039
1040 _ = dLogs; _ = oCaseFile;
1041 return (False, None);
1042
1043 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1044 """ Investigates HalReturnToFirmware hangs """
1045 del oCaseFile
1046 del sInfoText
1047 del dLogs
1048 # hope that's sufficient
1049 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1050
1051 ## Things we search a main or VM log for to figure out why something went bust.
1052 ## @note DO NOT ADD MORE STUFF HERE!
1053 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1054 katSimpleMainAndVmLogReasonsDeprecated = [
1055 # ( Whether to stop on hit, reason tuple, needle text. )
1056 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1057 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1058 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1059 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1060 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1061 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1062 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1063 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1064 ( True, ktReason_Networking_Nonexistent_host_nic,
1065 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1066 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1067 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1068 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1069 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1070 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1071 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1072 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1073 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1074 ( True, ktReason_Unknown_File_Not_Found,
1075 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1076 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1077 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1078 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1079 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1080 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1081 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1082 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1083 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1084 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1085 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1086 ];
1087
1088 ## This we search a main log for to figure out why something went bust.
1089 katSimpleMainLogReasons = [
1090 # ( Whether to stop on hit, reason tuple, needle text. )
1091 ( False, ktReason_Host_win32com_gen_py, 'ModuleNotFoundError: No module named \'win32com.gen_py' ),
1092
1093 ];
1094
1095 ## This we search a VM log for to figure out why something went bust.
1096 katSimpleVmLogReasons = [
1097 # ( Whether to stop on hit, reason tuple, needle text. )
1098 ];
1099
1100 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1101 katSimpleVBoxHardeningLogReasons = [
1102 # ( Whether to stop on hit, reason tuple, needle text. )
1103 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1104 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1105 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1106 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1107 ];
1108
1109 ## Things we search a kernel.log file for to figure out why something went bust.
1110 katSimpleKernelLogReasons = [
1111 # ( Whether to stop on hit, reason tuple, needle text. )
1112 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1113 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1114 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1115 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1116 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1117 ];
1118
1119 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1120 katSimpleVgaTextReasons = [
1121 # ( Whether to stop on hit, reason tuple, needle text. )
1122 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1123 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1124 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1125 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1126 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1127 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1128 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1129 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1130 "and send a report. Then try booting with the 'noapic' option\n"
1131 "\n" ),
1132 ( True, ktReason_OSInstall_GRUB_hang,
1133 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1134 ( True, ktReason_OSInstall_GRUB_hang,
1135 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1136 ( True, ktReason_OSInstall_GRUB_hang,
1137 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1138 ( True, ktReason_OSInstall_GRUB_hang,
1139 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1140 ( True, ktReason_OSInstall_Udev_hang,
1141 "\nStarting udev:\n\n\n\n" ),
1142 ( True, ktReason_OSInstall_Udev_hang,
1143 "\nStarting udev:\n------" ),
1144 ( True, ktReason_Panic_BootManagerC000000F,
1145 "Windows failed to start. A recent hardware or software change might be the" ),
1146 ( True, ktReason_BootManager_Image_corrupt,
1147 "BOOTMGR image is corrupt. The system cannot boot." ),
1148 ];
1149
1150 ## Things we search for in the info.txt file. Require handlers for now.
1151 katInfoTextHandlers = [
1152 # ( Trigger text, handler method )
1153 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1154 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1155 ];
1156
1157 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1158 katSimpleScreenshotHashReasons = [
1159 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1160 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1161 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1162 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1163 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1164 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1165 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1166 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1167 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1168 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1169 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1170 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1171 ];
1172
1173
1174 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1175 """
1176 Scans for atNeedles in sLog.
1177
1178 Returns True if a stop-on-hit neelde was found.
1179 Returns None if a no-stop reason was found.
1180 Returns False if no hit.
1181 """
1182 fRet = False;
1183 for fStopOnHit, tReason, oNeedle in atNeedles:
1184 fMatch = False;
1185 if utils.isString(oNeedle):
1186 for sLog in asLogs:
1187 if sLog:
1188 fMatch |= sLog.find(oNeedle) > 0;
1189 else:
1190 for sLog in asLogs:
1191 if sLog:
1192 fMatch |= oNeedle.search(sLog) is not None;
1193 if fMatch:
1194 oCaseFile.noteReasonForId(tReason, idTestResult);
1195 if fStopOnHit:
1196 return True;
1197 fRet = None;
1198 return fRet;
1199
1200
1201 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1202 """
1203 Investigates a failed VM run.
1204 """
1205 enmReason = None;
1206 sParentName = oFailedResult.oParent.sName if oFailedResult.oParent else '';
1207 if oFailedResult.sName == 'VBoxWindowsAdditions.exe' or sResultLog.find('VBoxWindowsAdditions.exe" failed with') > 0:
1208 enmReason = self.ktReason_Add_Installer_Win_Failed;
1209 # guest control:
1210 elif sParentName == 'Guest Control' and oFailedResult.sName == 'Preparations':
1211 enmReason = self.ktReason_Add_GstCtl_Preparations;
1212 elif oFailedResult.sName == 'Session Basics':
1213 enmReason = self.ktReason_Add_GstCtl_SessionBasics;
1214 elif oFailedResult.sName == 'Session Process References':
1215 enmReason = self.ktReason_Add_GstCtl_SessionProcRefs;
1216 elif oFailedResult.sName == 'Copy from guest':
1217 if sResultLog.find('*** abort action ***') >= 0:
1218 enmReason = self.ktReason_Add_GstCtl_CopyFromGuest_Timeout;
1219 elif oFailedResult.sName == 'Copy to guest':
1220 off = sResultLog.find('"Guest directory "');
1221 if off > 0 and sResultLog.find('" already exists"', off, off + 80):
1222 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstExists;
1223 elif sResultLog.find('Guest destination must not be empty') >= 0:
1224 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstEmpty;
1225 elif sResultLog.find('*** abort action ***') >= 0:
1226 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_Timeout;
1227 elif oFailedResult.sName.find('Session w/ Guest Reboot') >= 0:
1228 enmReason = self.ktReason_Add_GstCtl_Session_Reboot;
1229 # shared folders:
1230 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Automounting':
1231 enmReason = self.ktReason_Add_ShFl_Automount;
1232 elif oFailedResult.sName == 'mmap':
1233 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1234 enmReason = self.ktReason_Add_Mmap_Coherency;
1235 elif sResultLog.find('FlushViewOfFile') >= 0:
1236 enmReason = self.ktReason_Add_FlushViewOfFile;
1237 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Running FsPerf':
1238 enmReason = self.ktReason_Add_ShFl_FsPerf; ## Maybe it would be better to be more specific...
1239
1240 if enmReason is not None:
1241 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1242
1243 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1244 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1245 return False;
1246
1247 def isResultFromGATest(self, oCaseFile, oFailedResult):
1248 """
1249 Checks if this result and corresponding log snippet looks like a GA test run.
1250 """
1251 while oFailedResult is not None:
1252 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', 'VBoxWindowsAdditions.exe' ]:
1253 return True;
1254 if oCaseFile.oTestCase.sName == 'Guest Additions' and oFailedResult.sName in [ 'Install', ]:
1255 return True;
1256 oFailedResult = oFailedResult.oParent;
1257 return False;
1258
1259
1260 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1261 """
1262 Investigates a failed VM run.
1263 """
1264
1265 def investigateLogSet():
1266 """
1267 Investigates the current set of VM related logs.
1268 """
1269 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1270 % ( len(sResultLog if sResultLog else ''),
1271 len(sVMLog if sVMLog else ''),
1272 len(sKrnlLog if sKrnlLog else ''),
1273 len(sVgaText if sVgaText else ''),
1274 len(sInfoText if sInfoText else ''),
1275 len(sNtHardLog if sNtHardLog else ''),));
1276
1277 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1278 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1279 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1280 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1281 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1282 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1283
1284 # TODO: more
1285
1286 #
1287 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1288 #
1289 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1290 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1291 if sDetails is not None:
1292 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1293 sKey = sDetails.split(' ', 1)[0];
1294 try: sKey = '0x%08X' % (int(sKey, 16),);
1295 except: pass;
1296 if sKey in self.asBsodReasons:
1297 tReason = ( self.ksBsodCategory, sKey );
1298 elif sKey.lower() in self.asBsodReasons: # just in case.
1299 tReason = ( self.ksBsodCategory, sKey.lower() );
1300 else:
1301 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1302 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1303 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1304
1305 fFoundSomething = False;
1306
1307 #
1308 # Look for linux panic.
1309 #
1310 if sKrnlLog is not None:
1311 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1312 if fRet is True:
1313 return fRet;
1314 fFoundSomething |= fRet is None;
1315
1316 #
1317 # Loop thru the simple stuff.
1318 #
1319
1320 # Main log.
1321 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1322 if fRet is True:
1323 return fRet;
1324 fFoundSomething |= fRet is None;
1325
1326 # VM log.
1327 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1328 if fRet is True:
1329 return fRet;
1330 fFoundSomething |= fRet is None;
1331
1332 # Old main + vm log.
1333 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1334 oCaseFile, oFailedResult.idTestResult);
1335 if fRet is True:
1336 return fRet;
1337 fFoundSomething |= fRet is None;
1338
1339 # Continue with vga text.
1340 if sVgaText:
1341 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1342 if fRet is True:
1343 return fRet;
1344 fFoundSomething |= fRet is None;
1345
1346 # Continue with screen hashes.
1347 if sScreenHash is not None:
1348 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1349 if sScreenHash == sHash:
1350 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1351 if fStopOnHit:
1352 return True;
1353 fFoundSomething = True;
1354
1355 # Check VBoxHardening.log.
1356 if sNtHardLog is not None:
1357 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1358 if fRet is True:
1359 return fRet;
1360 fFoundSomething |= fRet is None;
1361
1362 #
1363 # Complicated stuff.
1364 #
1365 dLogs = {
1366 'sVMLog': sVMLog,
1367 'sNtHardLog': sNtHardLog,
1368 'sScreenHash': sScreenHash,
1369 'sKrnlLog': sKrnlLog,
1370 'sVgaText': sVgaText,
1371 'sInfoText': sInfoText,
1372 };
1373
1374 # info.txt.
1375 if sInfoText:
1376 for sNeedle, fnHandler in self.katInfoTextHandlers:
1377 if sInfoText.find(sNeedle) > 0:
1378 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1379 if tReason is not None:
1380 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1381 if fStop:
1382 return True;
1383 fFoundSomething = True;
1384
1385 #
1386 # Check for repeated reboots...
1387 #
1388 if sVMLog is not None:
1389 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1390 if cResets > 10:
1391 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1392 sComment = 'Counted %s reboots' % (cResets,));
1393
1394 return fFoundSomething;
1395
1396 #
1397 # Check if we got any VM or/and kernel logs. Treat them as sets in
1398 # case we run multiple VMs here (this is of course ASSUMING they
1399 # appear in the order that terminateVmBySession uploads them).
1400 #
1401 cTimes = 0;
1402 sVMLog = None;
1403 sNtHardLog = None;
1404 sScreenHash = None;
1405 sKrnlLog = None;
1406 sVgaText = None;
1407 sInfoText = None;
1408 for oFile in oFailedResult.aoFiles:
1409 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1410 if 'VBoxHardening.log' not in oFile.sFile:
1411 if sVMLog is not None:
1412 if investigateLogSet() is True:
1413 return True;
1414 cTimes += 1;
1415 sInfoText = None;
1416 sVgaText = None;
1417 sKrnlLog = None;
1418 sScreenHash = None;
1419 sNtHardLog = None;
1420 sVMLog = oCaseFile.getLogFile(oFile);
1421 else:
1422 sNtHardLog = oCaseFile.getLogFile(oFile);
1423 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1424 sKrnlLog = oCaseFile.getLogFile(oFile);
1425 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1426 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1427 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1428 sInfoText = oCaseFile.getLogFile(oFile);
1429 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1430 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1431 if sScreenHash is not None:
1432 sScreenHash = sScreenHash.lower();
1433 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1434
1435 if ( sVMLog is not None \
1436 or sNtHardLog is not None \
1437 or cTimes == 0) \
1438 and investigateLogSet() is True:
1439 return True;
1440
1441 return None;
1442
1443 def isResultFromVMRun(self, oFailedResult, sResultLog):
1444 """
1445 Checks if this result and corresponding log snippet looks like a VM run.
1446 """
1447
1448 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1449 if sResultLog.find(' startVm') > 0:
1450 return True;
1451
1452 # Any other indicators? No?
1453 _ = oFailedResult;
1454 return False;
1455
1456
1457 ## Things we search a VBoxSVC log for to figure out why something went bust.
1458 katSimpleSvcLogReasons = [
1459 # ( Whether to stop on hit, reason tuple, needle text. )
1460 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1461 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1462 ];
1463
1464 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1465 """
1466 Check the VBoxSVC log for a single VM run.
1467 """
1468 if sSvcLog:
1469 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1470 if fRet is True or fRet is None:
1471 return True;
1472 return False;
1473
1474 def investigateNtHardLogForVMRun(self, oCaseFile):
1475 """
1476 Check if the hardening log for a single VM run contains VM crash indications.
1477 """
1478 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1479 for oLogFile in aoLogFiles:
1480 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1481 sLog = oCaseFile.getLogFile(oLogFile);
1482 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1483 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1484 return False;
1485
1486
1487 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1488 """
1489 Checks out a VBox VM test.
1490
1491 This is generic investigation of a test running one or more VMs, like
1492 for example a smoke test or a guest installation test.
1493
1494 The fSingleVM parameter is a hint, which probably won't come in useful.
1495 """
1496 _ = fSingleVM;
1497
1498 #
1499 # Get a list of test result failures we should be looking into and the main log.
1500 #
1501 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1502 sMainLog = oCaseFile.getMainLog();
1503
1504 #
1505 # There are a set of errors ending up on the top level result record.
1506 # Should deal with these first.
1507 #
1508 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1509 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1510 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1511 # ignore other failures in the test if we're not a little bit careful here.
1512 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1513 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1514 return self.caseClosed(oCaseFile);
1515
1516 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1517 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1518 (' Timeout: ', ' Attempting to abort child...',) ):
1519 if sMainLog.find('*** glibc detected *** /') > 0:
1520 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1521 else:
1522 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1523 return self.caseClosed(oCaseFile);
1524
1525 # Look for heap corruption without visible hang.
1526 if sMainLog.find('*** glibc detected *** /') > 0 \
1527 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1528 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1529 return self.caseClosed(oCaseFile);
1530
1531 # Out of memory w/ timeout.
1532 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1533 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1534 return self.caseClosed(oCaseFile);
1535
1536 # Stale files like vts_rm.exe (windows).
1537 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1538 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1539 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1540 return self.caseClosed(oCaseFile);
1541
1542 #
1543 # XPCOM screwup
1544 #
1545 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1546 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1547 return self.caseClosed(oCaseFile);
1548
1549 #
1550 # Go thru each failed result.
1551 #
1552 for oFailedResult in aoFailedResults:
1553 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1554 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1555 if oFailedResult.sName == 'Installing VirtualBox':
1556 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1557
1558 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1559 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1560
1561 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1562 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1563
1564 elif self.isResultFromGATest(oCaseFile, oFailedResult):
1565 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1566
1567 elif sResultLog.find('most likely not unique') > 0:
1568 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1569 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1570 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1571
1572 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1573 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1574 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1575
1576 elif sResultLog.find('** error: no action was specified') > 0 \
1577 or sResultLog.find('(len(self._asXml, asText))') > 0:
1578 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1579
1580 else:
1581 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1582 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1583
1584 #
1585 # Windows python/com screwup.
1586 #
1587 if sMainLog.find('ModuleNotFoundError: No module named \'win32com.gen_py') > 0:
1588 oCaseFile.noteReason(self.ktReason_Host_win32com_gen_py);
1589 return self.caseClosed(oCaseFile);
1590
1591 #
1592 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1593 #
1594 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1595 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1596 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1597 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1598 return self.caseClosed(oCaseFile);
1599 if self.investigateNtHardLogForVMRun(oCaseFile):
1600 return self.caseClosed(oCaseFile);
1601
1602 #
1603 # Report home and close the case if we got them all, otherwise log it.
1604 #
1605 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1606 return self.caseClosed(oCaseFile);
1607
1608 if oCaseFile.dReasonForResultId:
1609 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1610 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1611 else:
1612 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1613 return False;
1614
1615
1616 ## Things we search a main log for to figure out why something in the API test went bust.
1617 katSimpleApiMainLogReasons = [
1618 # ( Whether to stop on hit, reason tuple, needle text. )
1619 ( True, ktReason_Networking_Nonexistent_host_nic,
1620 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1621 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1622 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1623 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1624 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1625 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1626 ( True, ktReason_API_MoveVM_InvalidParameter,
1627 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1628 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1629 ];
1630
1631 def investigateVBoxApiTest(self, oCaseFile):
1632 """
1633 Checks out a VBox API test.
1634 """
1635
1636 #
1637 # Get a list of test result failures we should be looking into and the main log.
1638 #
1639 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1640 sMainLog = oCaseFile.getMainLog();
1641
1642 #
1643 # Go thru each failed result.
1644 #
1645 for oFailedResult in aoFailedResults:
1646 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1647 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1648 if oFailedResult.sName == 'Installing VirtualBox':
1649 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1650
1651 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1652 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1653
1654 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1655 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1656
1657 else:
1658 fFoundSomething = False;
1659 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1660 if sResultLog.find(sNeedle) > 0:
1661 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1662 fFoundSomething = True;
1663 if fStopOnHit:
1664 break;
1665 if fFoundSomething:
1666 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1667 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1668
1669 #
1670 # Report home and close the case if we got them all, otherwise log it.
1671 #
1672 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1673 return self.caseClosed(oCaseFile);
1674
1675 if oCaseFile.dReasonForResultId:
1676 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1677 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1678 else:
1679 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1680 return False;
1681
1682
1683 def reasoningFailures(self):
1684 """
1685 Guess the reason for failures.
1686 """
1687 #
1688 # Get a list of failed test sets without any assigned failure reason.
1689 #
1690 cGot = 0;
1691 if not self.oConfig.aidTestSets:
1692 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1693 tsNow = self.tsNow);
1694 else:
1695 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1696 for oTestSet in aoTestSets:
1697 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1698 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1699
1700 #
1701 # Open a case file and assign it to the right investigator.
1702 #
1703 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1704 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1705 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1706 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1707 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1708
1709 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1710
1711 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1712 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1713 fRc = self.investigateBadTestBox(oCaseFile);
1714
1715 elif oCaseFile.isVBoxUnitTest():
1716 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1717 fRc = self.investigateVBoxUnitTest(oCaseFile);
1718
1719 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1720 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1721 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1722
1723 elif oCaseFile.isVBoxUSBTest():
1724 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1725 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1726
1727 elif oCaseFile.isVBoxStorageTest():
1728 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1729 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1730
1731 elif oCaseFile.isVBoxGAsTest():
1732 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1733 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1734
1735 elif oCaseFile.isVBoxAPITest():
1736 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1737 fRc = self.investigateVBoxApiTest(oCaseFile);
1738
1739 elif oCaseFile.isVBoxBenchmarkTest():
1740 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1741 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1742
1743 elif oCaseFile.isVBoxSmokeTest():
1744 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1745 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1746
1747 elif oCaseFile.isVBoxSerialTest():
1748 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1749 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1750
1751 else:
1752 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1753 fRc = False;
1754 cGot += fRc is True;
1755
1756 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1757 return 0;
1758
1759
1760 def main(self):
1761 """
1762 The 'main' function.
1763 Return exit code (0, 1, etc).
1764 """
1765 # Database stuff.
1766 self.oDb = TMDatabaseConnection()
1767 self.oTestResultLogic = TestResultLogic(self.oDb);
1768 self.oTestSetLogic = TestSetLogic(self.oDb);
1769 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1770 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1771 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1772 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1773
1774 # Get a fix on our 'now' before we do anything..
1775 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1776 self.tsNow = self.oDb.fetchOne();
1777
1778 # If we're suppost to commit anything we need to get our user ID.
1779 rcExit = 0;
1780 if self.oConfig.fRealRun:
1781 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1782 if self.oLogin is None:
1783 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1784 else:
1785 self.uidSelf = self.oLogin.uid;
1786
1787 #
1788 # Do the stuff.
1789 #
1790 if rcExit == 0:
1791 rcExit = self.selfCheck();
1792 if rcExit == 0:
1793 rcExit = self.badTestBoxManagement();
1794 rcExit2 = self.reasoningFailures();
1795 if rcExit == 0:
1796 rcExit = rcExit2;
1797 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1798 if rcExit == 0:
1799 rcExit = self.badTestBoxManagement();
1800
1801 # Cleanup.
1802 self.oFailureReasonLogic = None;
1803 self.oTestResultFailureLogic = None;
1804 self.oTestSetLogic = None;
1805 self.oTestResultLogic = None;
1806 self.oDb.close();
1807 self.oDb = None;
1808 if self.oLogFile is not None:
1809 self.oLogFile.close();
1810 self.oLogFile = None;
1811 return rcExit;
1812
1813if __name__ == '__main__':
1814 sys.exit(VirtualTestSheriff().main());
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette