VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 97171

Last change on this file since 97171 was 97171, checked in by vboxsync, 2 years ago

Validation Kit/vsheriff: Tried teaching it that the host disk is full.

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 89.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 97171 2022-10-17 16:45:11Z vboxsync $
4# pylint: disable=line-too-long
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15from __future__ import print_function;
16
17__copyright__ = \
18"""
19Copyright (C) 2012-2022 Oracle and/or its affiliates.
20
21This file is part of VirtualBox base platform packages, as
22available from https://www.virtualbox.org.
23
24This program is free software; you can redistribute it and/or
25modify it under the terms of the GNU General Public License
26as published by the Free Software Foundation, in version 3 of the
27License.
28
29This program is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of
31MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32General Public License for more details.
33
34You should have received a copy of the GNU General Public License
35along with this program; if not, see <https://www.gnu.org/licenses>.
36
37The contents of this file may alternatively be used under the terms
38of the Common Development and Distribution License Version 1.0
39(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
40in the VirtualBox distribution, in which case the provisions of the
41CDDL are applicable instead of those of the GPL.
42
43You may elect to license modified versions of this file under the
44terms and conditions of either the GPL or the CDDL or both.
45
46SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
47"""
48__version__ = "$Revision: 97171 $"
49
50
51# Standard python imports
52import hashlib;
53import os;
54import re;
55import smtplib;
56#import subprocess;
57import sys;
58from email.mime.multipart import MIMEMultipart;
59from email.mime.text import MIMEText;
60from email.utils import COMMASPACE;
61
62if sys.version_info[0] >= 3:
63 from io import BytesIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
64else:
65 from StringIO import StringIO as BytesIO; # pylint: disable=import-error,no-name-in-module,useless-import-alias
66from optparse import OptionParser; # pylint: disable=deprecated-module
67from PIL import Image; # pylint: disable=import-error
68
69# Add Test Manager's modules path
70g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
71sys.path.append(g_ksTestManagerDir);
72
73# Test Manager imports
74from common import utils;
75from testmanager.core.db import TMDatabaseConnection;
76from testmanager.core.build import BuildDataEx;
77from testmanager.core.failurereason import FailureReasonLogic;
78from testmanager.core.testbox import TestBoxLogic, TestBoxData;
79from testmanager.core.testcase import TestCaseDataEx;
80from testmanager.core.testgroup import TestGroupData;
81from testmanager.core.testset import TestSetLogic, TestSetData;
82from testmanager.core.testresults import TestResultLogic, TestResultFileData;
83from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
84from testmanager.core.useraccount import UserAccountLogic;
85from testmanager.config import g_ksSmtpHost, g_kcSmtpPort, g_ksAlertFrom, \
86 g_ksAlertSubject, g_asAlertList #, g_ksLomPassword;
87
88# Python 3 hacks:
89if sys.version_info[0] >= 3:
90 xrange = range; # pylint: disable=redefined-builtin,invalid-name
91
92
93class VirtualTestSheriffCaseFile(object):
94 """
95 A failure investigation case file.
96
97 """
98
99
100 ## Max log file we'll read into memory. (256 MB)
101 kcbMaxLogRead = 0x10000000;
102
103 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
104 self.oSheriff = oSheriff;
105 self.oTestSet = oTestSet; # TestSetData
106 self.oTree = oTree; # TestResultDataEx
107 self.oBuild = oBuild; # BuildDataEx
108 self.oTestBox = oTestBox; # TestBoxData
109 self.oTestGroup = oTestGroup; # TestGroupData
110 self.oTestCase = oTestCase; # TestCaseDataEx
111 self.sMainLog = ''; # The main log file. Empty string if not accessible.
112 self.sSvcLog = ''; # The VBoxSVC log file. Empty string if not accessible.
113
114 # Generate a case file name.
115 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
116 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
117 % ( self.oTestSet.idTestSet,
118 self.oTestCase.sName,
119 self.oTestBox.sName,
120 self.oTestBox.sOs,
121 self.oTestBox.sOsVersion,
122 self.oTestBox.sCpuArch,
123 self.oTestBox.sCpuName,
124 self.oTestBox.sCpuVendor,
125 self.oBuild.oCat.sProduct,
126 self.oBuild.oCat.sBranch,
127 self.oBuild.oCat.sType,
128 self.oBuild.iRevision, );
129
130 # Investigation notes.
131 self.tReason = None; # None or one of the ktReason_XXX constants.
132 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
133 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
134
135 #
136 # Reason.
137 #
138
139 def noteReason(self, tReason):
140 """ Notes down a possible reason. """
141 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
142 self.tReason = tReason;
143 return True;
144
145 def noteReasonForId(self, tReason, idTestResult, sComment = None):
146 """ Notes down a possible reason for a specific test result. """
147 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
148 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
149 (u' (%s)' % (sComment,)) if sComment is not None else ''));
150 self.dReasonForResultId[idTestResult] = tReason;
151 if sComment is not None:
152 self.dCommentForResultId[idTestResult] = sComment;
153 return True;
154
155
156 #
157 # Test classification.
158 #
159
160 def isVBoxTest(self):
161 """ Test classification: VirtualBox (using the build) """
162 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
163
164 def isVBoxUnitTest(self):
165 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
166 return self.isVBoxTest() \
167 and (self.oTestCase.sName.lower() == 'unit tests' or self.oTestCase.sName.lower().startswith('misc: unit tests'));
168
169 def isVBoxInstallTest(self):
170 """ Test case classification: VirtualBox Guest installation test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('install:');
173
174 def isVBoxUnattendedInstallTest(self):
175 """ Test case classification: VirtualBox Guest installation test. """
176 return self.isVBoxTest() \
177 and self.oTestCase.sName.lower().startswith('uinstall:');
178
179 def isVBoxUSBTest(self):
180 """ Test case classification: VirtualBox USB test. """
181 return self.isVBoxTest() \
182 and self.oTestCase.sName.lower().startswith('usb:');
183
184 def isVBoxStorageTest(self):
185 """ Test case classification: VirtualBox Storage test. """
186 return self.isVBoxTest() \
187 and self.oTestCase.sName.lower().startswith('storage:');
188
189 def isVBoxGAsTest(self):
190 """ Test case classification: VirtualBox Guest Additions test. """
191 return self.isVBoxTest() \
192 and ( self.oTestCase.sName.lower().startswith('guest additions')
193 or self.oTestCase.sName.lower().startswith('ga\'s tests'));
194
195 def isVBoxAPITest(self):
196 """ Test case classification: VirtualBox API test. """
197 return self.isVBoxTest() \
198 and self.oTestCase.sName.lower().startswith('api:');
199
200 def isVBoxBenchmarkTest(self):
201 """ Test case classification: VirtualBox Benchmark test. """
202 return self.isVBoxTest() \
203 and self.oTestCase.sName.lower().startswith('benchmark:');
204
205 def isVBoxSmokeTest(self):
206 """ Test case classification: Smoke test. """
207 return self.isVBoxTest() \
208 and self.oTestCase.sName.lower().startswith('smoketest');
209
210 def isVBoxSerialTest(self):
211 """ Test case classification: Smoke test. """
212 return self.isVBoxTest() \
213 and self.oTestCase.sName.lower().startswith('serial:');
214
215
216 #
217 # Utility methods.
218 #
219
220 def getMainLog(self):
221 """
222 Tries to read the main log file since this will be the first source of information.
223 """
224 if self.sMainLog:
225 return self.sMainLog;
226 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
227 if oFile is not None:
228 try:
229 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
230 except Exception as oXcpt:
231 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
232 self.sMainLog = '';
233 else:
234 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
235 return self.sMainLog;
236
237 def getLogFile(self, oFile):
238 """
239 Tries to read the given file as a utf-8 log file.
240 oFile is a TestFileDataEx instance.
241 Returns empty string if problems opening or reading the file.
242 """
243 sContent = '';
244 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
245 if oFile is not None:
246 try:
247 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
248 except Exception as oXcpt:
249 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
250 else:
251 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
252 return sContent;
253
254 def getSvcLog(self):
255 """
256 Tries to read the VBoxSVC log file as it typically not associated with a failing test result.
257 Note! Returns the first VBoxSVC log file we find.
258 """
259 if not self.sSvcLog:
260 aoSvcLogFiles = self.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseSvc);
261 if aoSvcLogFiles:
262 self.sSvcLog = self.getLogFile(aoSvcLogFiles[0]);
263 return self.sSvcLog;
264
265 def getScreenshotSha256(self, oFile):
266 """
267 Tries to read the given screenshot file, uncompress it, and do SHA-2
268 on the raw pixels.
269 Returns SHA-2 digest string on success, None on failure.
270 """
271 (oImgFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
272 try:
273 abImageFile = oImgFile.read();
274 except Exception as oXcpt:
275 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
276 else:
277 try:
278 oImage = Image.open(BytesIO(abImageFile));
279 except Exception as oXcpt:
280 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
281 else:
282 try:
283 oHash = hashlib.sha256();
284 if hasattr(oImage, 'tobytes'):
285 oHash.update(oImage.tobytes());
286 else:
287 oHash.update(oImage.tostring()); # pylint: disable=no-member
288 except Exception as oXcpt:
289 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
290 else:
291 return oHash.hexdigest();
292 return None;
293
294
295
296 def isSingleTestFailure(self):
297 """
298 Figure out if this is a single test failing or if it's one of the
299 more complicated ones.
300 """
301 if self.oTree.cErrors == 1:
302 return True;
303 if self.oTree.deepCountErrorContributers() <= 1:
304 return True;
305 return False;
306
307
308
309class VirtualTestSheriff(object): # pylint: disable=too-few-public-methods
310 """
311 Add build info into Test Manager database.
312 """
313
314 ## The user account for the virtual sheriff.
315 ksLoginName = 'vsheriff';
316
317 def __init__(self):
318 """
319 Parse command line.
320 """
321 self.oDb = None;
322 self.tsNow = None;
323 self.oTestResultLogic = None;
324 self.oTestSetLogic = None;
325 self.oFailureReasonLogic = None; # FailureReasonLogic;
326 self.oTestResultFailureLogic = None; # TestResultFailureLogic
327 self.oLogin = None;
328 self.uidSelf = -1;
329 self.oLogFile = None;
330 self.asBsodReasons = [];
331 self.asUnitTestReasons = [];
332
333 oParser = OptionParser();
334 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
335 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
336 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
337 help = 'Work period specified in hours. Defauls is 2 hours.');
338 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
339 help = 'Whether to commit the findings to the database. Default is a dry run.');
340 oParser.add_option('--testset', dest = 'aidTestSets', metavar = '<id>', default = [], type = 'int', action = 'append',
341 help = 'Only investigate this one. Accumulates IDs when repeated.');
342 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
343 help = 'Quiet execution');
344 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
345 help = 'Where to log messages.');
346 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
347 help = 'Enables debug mode.');
348
349 (self.oConfig, _) = oParser.parse_args();
350
351 if self.oConfig.sLogFile:
352 self.oLogFile = open(self.oConfig.sLogFile, "a"); # pylint: disable=consider-using-with
353 self.oLogFile.write('VirtualTestSheriff: $Revision: 97171 $ \n');
354
355
356 def eprint(self, sText):
357 """
358 Prints error messages.
359 Returns 1 (for exit code usage.)
360 """
361 print('error: %s' % (sText,));
362 if self.oLogFile is not None:
363 if sys.version_info[0] >= 3:
364 self.oLogFile.write(u'error: %s\n' % (sText,));
365 else:
366 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
367 return 1;
368
369 def dprint(self, sText):
370 """
371 Prints debug info.
372 """
373 if self.oConfig.fDebug:
374 if not self.oConfig.fQuiet:
375 print('debug: %s' % (sText, ));
376 if self.oLogFile is not None:
377 if sys.version_info[0] >= 3:
378 self.oLogFile.write(u'debug: %s\n' % (sText,));
379 else:
380 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
381 return 0;
382
383 def vprint(self, sText):
384 """
385 Prints verbose info.
386 """
387 if not self.oConfig.fQuiet:
388 print('info: %s' % (sText,));
389 if self.oLogFile is not None:
390 if sys.version_info[0] >= 3:
391 self.oLogFile.write(u'info: %s\n' % (sText,));
392 else:
393 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
394 return 0;
395
396 def getFailureReason(self, tReason):
397 """ Gets the failure reason object for tReason. """
398 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
399
400 def selfCheck(self):
401 """ Does some self checks, looking up things we expect to be in the database and such. """
402 rcExit = 0;
403 for sAttr in dir(self.__class__):
404 if sAttr.startswith('ktReason_'):
405 tReason = getattr(self.__class__, sAttr);
406 oFailureReason = self.getFailureReason(tReason);
407 if oFailureReason is None:
408 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
409 % (tReason[1], tReason[0],));
410
411 # Check the user account as well.
412 if self.oLogin is None:
413 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
414 if oLogin is None:
415 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
416 return rcExit;
417
418 def sendEmailAlert(self, uidAuthor, sBodyText):
419 """
420 Sends email alert.
421 """
422
423 # Get author email
424 self.oDb.execute('SELECT sEmail FROM Users WHERE uid=%s', (uidAuthor,));
425 sFrom = self.oDb.fetchOne();
426 if sFrom is not None:
427 sFrom = sFrom[0];
428 else:
429 sFrom = g_ksAlertFrom;
430
431 # Gather recipient list.
432 asEmailList = [];
433 for sUser in g_asAlertList:
434 self.oDb.execute('SELECT sEmail FROM Users WHERE sUsername=%s', (sUser,));
435 sEmail = self.oDb.fetchOne();
436 if sEmail:
437 asEmailList.append(sEmail[0]);
438 if not asEmailList:
439 return self.eprint('No email addresses to send alter to!');
440
441 # Compose the message.
442 oMsg = MIMEMultipart();
443 oMsg['From'] = sFrom;
444 oMsg['To'] = COMMASPACE.join(asEmailList);
445 oMsg['Subject'] = g_ksAlertSubject;
446 oMsg.attach(MIMEText(sBodyText, 'plain'))
447
448 # Try send it.
449 try:
450 oSMTP = smtplib.SMTP(g_ksSmtpHost, g_kcSmtpPort);
451 oSMTP.sendmail(sFrom, asEmailList, oMsg.as_string())
452 oSMTP.quit()
453 except smtplib.SMTPException as oXcpt:
454 return self.eprint('Failed to send mail: %s' % (oXcpt,));
455
456 return 0;
457
458 def badTestBoxManagement(self):
459 """
460 Looks for bad test boxes and first tries once to reboot them then disables them.
461 """
462 rcExit = 0;
463
464 #
465 # We skip this entirely if we're running in the past and not in harmless debug mode.
466 #
467 if self.oConfig.cStartHoursAgo != 0 \
468 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
469 return rcExit;
470 tsNow = self.tsNow if self.oConfig.fDebug else None;
471 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
472 oTestBoxLogic = TestBoxLogic(self.oDb);
473
474 #
475 # Generate a list of failures reasons we consider bad-testbox behavior.
476 #
477 aidFailureReasons = [
478 self.getFailureReason(self.ktReason_Host_DriverNotLoaded).idFailureReason,
479 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
480 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
481 self.getFailureReason(self.ktReason_Host_InstallationFailed).idFailureReason,
482 ];
483
484 #
485 # Get list of bad test boxes for given period and check them out individually.
486 #
487 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
488 aidFailureReasons = aidFailureReasons);
489 for idTestBox in aidBadTestBoxes:
490 # Skip if the testbox is already disabled or has a pending reboot command.
491 try:
492 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
493 except Exception as oXcpt:
494 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
495 continue;
496 if not oTestBox.fEnabled:
497 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
498 % ( idTestBox, oTestBox.sName, ));
499 continue;
500 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
501 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
502 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
503 continue;
504
505 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
506 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
507 cOkay = 0;
508 cBad = 0;
509 iFirstOkay = len(aoSets);
510 for iSet, oSet in enumerate(aoSets):
511 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
512 cBad += 1;
513 else:
514 # Check for bad failure reasons.
515 oFailure = None;
516 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
517 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oSet.idTestSet)
518 aoFailedResults = oTree.getListOfFailures();
519 for oFailedResult in aoFailedResults:
520 oFailure = self.oTestResultFailureLogic.getById(oFailedResult.idTestResult);
521 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
522 break;
523 oFailure = None;
524 if oFailure is not None:
525 cBad += 1;
526 else:
527 # This is an okay test result then.
528 ## @todo maybe check the elapsed time here, it could still be a bad run?
529 cOkay += 1;
530 iFirstOkay = min(iFirstOkay, iSet);
531 if iSet > 10:
532 break;
533
534 # We react if there are two or more bad-testbox statuses at the head of the
535 # history and at least three in the last 10 results.
536 if iFirstOkay >= 2 and cBad > 2:
537 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
538 sComment = u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
539 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
540 self.vprint(sComment);
541 self.sendEmailAlert(self.uidSelf, sComment);
542 if self.oConfig.fRealRun is True:
543 try:
544 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
545 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
546 % (iFirstOkay, cBad, cOkay),);
547 except Exception as oXcpt:
548 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
549 else:
550 sComment = u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u' \
551 % (idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay);
552 self.vprint(sComment);
553 self.sendEmailAlert(self.uidSelf, sComment);
554 if self.oConfig.fRealRun is True:
555 try:
556 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
557 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
558 % (iFirstOkay, cBad, cOkay),);
559 except Exception as oXcpt:
560 rcExit = self.eprint(u'Error rebooting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
561 else:
562 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
563 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
564
565 ## @todo r=bird: review + rewrite;
566 ## - no selecting here, that belongs in the core/*.py files.
567 ## - preserve existing comments.
568 ## - doing way too much in the try/except block.
569 ## - No password quoting in the sshpass command that always fails (127).
570 ## - Timeout is way to low. testboxmem1 need more than 10 min to take a dump, ages to
571 ## get thru POST and another 5 just to time out in grub. Should be an hour or so.
572 ## Besides, it need to be constant elsewhere in the file, not a variable here.
573 ##
574 ##
575 ## Reset hanged testboxes
576 ##
577 #cStatusTimeoutMins = 10;
578 #
579 #self.oDb.execute('SELECT TestBoxStatuses.idTestBox\n'
580 # ' FROM TestBoxStatuses, TestBoxes\n'
581 # ' WHERE TestBoxStatuses.tsUpdated >= (CURRENT_TIMESTAMP - interval \'%s hours\')\n'
582 # ' AND TestBoxStatuses.tsUpdated < (CURRENT_TIMESTAMP - interval \'%s minutes\')\n'
583 # ' AND TestBoxStatuses.idTestBox = TestBoxes.idTestBox\n'
584 # ' AND Testboxes.tsExpire = \'infinity\'::timestamp', (cHoursBack,cStatusTimeoutMins));
585 #for idTestBox in self.oDb.fetchAll():
586 # idTestBox = idTestBox[0];
587 # try:
588 # oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
589 # except Exception as oXcpt:
590 # rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
591 # continue;
592 # # Skip if the testbox is already disabled, already reset or there's no iLOM
593 # if not oTestBox.fEnabled or oTestBox.ipLom is None or oTestBox.sComment is not None and oTestBox.sComment.find('Automatically reset') >= 0:
594 # self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
595 # % ( idTestBox, oTestBox.sName, ));
596 # continue;
597 # ## @todo get iLOM credentials from a table?
598 # sCmd = 'sshpass -p%s ssh -oStrictHostKeyChecking=no root@%s show /SP && reset /SYS' % (g_ksLomPassword, oTestBox.ipLom,);
599 # try:
600 # oPs = subprocess.Popen(sCmd, stdout=subprocess.PIPE, shell=True);
601 # sStdout = oPs.communicate()[0];
602 # iRC = oPs.wait();
603 #
604 # oTestBox.sComment = 'Automatically reset (iRC=%u sStdout=%s)' % (iRC, sStdout,);
605 # oTestBoxLogic.editEntry(oTestBox, self.uidSelf, fCommit = True);
606 #
607 # sComment = u'Reset testbox #%u (%s) - iRC=%u sStduot=%s' % ( idTestBox, oTestBox.sName, iRC, sStdout);
608 # self.vprint(sComment);
609 # self.sendEmailAlert(self.uidSelf, sComment);
610 #
611 # except Exception as oXcpt:
612 # rcExit = self.eprint(u'Error resetting testbox #%u (%s): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
613 #
614 return rcExit;
615
616
617 ## @name Failure reasons we know.
618 ## @{
619
620 ktReason_Add_Installer_Win_Failed = ( 'Additions', 'Win GA install' );
621 ktReason_Add_ShFl_Automount = ( 'Additions', 'Automounting' );
622 ktReason_Add_ShFl_FsPerf = ( 'Additions', 'FsPerf' );
623 ktReason_Add_ShFl_FsPerf_Abend = ( 'Additions', 'FsPerf abend' );
624 ktReason_Add_GstCtl_Preparations = ( 'Additions', 'GstCtl preparations' );
625 ktReason_Add_GstCtl_SessionBasics = ( 'Additions', 'Session basics' );
626 ktReason_Add_GstCtl_SessionProcRefs = ( 'Additions', 'Session process' );
627 ktReason_Add_GstCtl_Session_Reboot = ( 'Additions', 'Session reboot' );
628 ktReason_Add_GstCtl_CopyFromGuest_Timeout = ( 'Additions', 'CopyFromGuest timeout' );
629 ktReason_Add_GstCtl_CopyToGuest_Timeout = ( 'Additions', 'CopyToGuest timeout' );
630 ktReason_Add_GstCtl_CopyToGuest_DstEmpty = ( 'Additions', 'CopyToGuest dst empty' );
631 ktReason_Add_GstCtl_CopyToGuest_DstExists = ( 'Additions', 'CopyToGuest dst exists' );
632 ktReason_Add_FlushViewOfFile = ( 'Additions', 'FlushViewOfFile' );
633 ktReason_Add_Mmap_Coherency = ( 'Additions', 'mmap coherency' );
634 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
635 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
636 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
637 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
638 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
639 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
640 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
641 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
642 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
643 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
644 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
645 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
646 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
647 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
648 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
649 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
650 ktReason_Host_InstallationFailed = ( 'Host', 'Installation failed' );
651 ktReason_Host_InstallationWantReboot = ( 'Host', 'Installation want reboot' );
652 ktReason_Host_InvalidPackage = ( 'Host', 'ERROR_INSTALL_PACKAGE_INVALID' );
653 ktReason_Host_InstallSourceAbsent = ( 'Host', 'ERROR_INSTALL_SOURCE_ABSENT' );
654 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
655 ktReason_Host_DiskFull = ( 'Host', 'Host disk full' );
656 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
657 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
658 ktReason_Host_win32com_gen_py = ( 'Host', 'win32com.gen_py' );
659 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
660 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
661 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
662 ktReason_Host_NetworkMisconfiguration = ( 'Host', 'Network misconfiguration' );
663 ktReason_Host_TSTInfo_Accuracy_OOR = ( 'Host', 'TSTInfo accuracy out of range' );
664 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
665 ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND = ( 'Networking', 'VERR_INTNET_FLT_IF_NOT_FOUND' );
666 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
667 ktReason_OSInstall_Udev_hang = ( 'O/S Install', 'udev hang' );
668 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
669 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
670 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
671 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
672 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
673 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
674 ktReason_API_std_bad_alloc = ( 'API / (XP)COM', 'std::bad_alloc' );
675 ktReason_API_Digest_Mismatch = ( 'API / (XP)COM', 'Digest mismatch' );
676 ktReason_API_MoveVM_SharingViolation = ( 'API / (XP)COM', 'MoveVM sharing violation' );
677 ktReason_API_MoveVM_InvalidParameter = ( 'API / (XP)COM', 'MoveVM invalid parameter' );
678 ktReason_API_Open_Session_Failed = ( 'API / (XP)COM', 'Open session failed' );
679 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
680 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
681 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
682 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
683 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
684 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
685 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
686 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
687 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
688 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
689 ktReason_Unknown_VM_Start_Error = ( 'Unknown', 'VM Start Error' );
690 ktReason_Unknown_VM_Runtime_Error = ( 'Unknown', 'VM Runtime Error' );
691 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
692 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
693 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
694 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
695 ktReason_GuestBug_CompizVBoxQt = ( 'Guest Bug', 'Compiz + VirtualBox Qt GUI crash' );
696 ## @}
697
698 ## BSOD category.
699 ksBsodCategory = 'BSOD';
700 ## Special reason indicating that the flesh and blood sheriff has work to do.
701 ksBsodAddNew = 'Add new BSOD';
702
703 ## Unit test category.
704 ksUnitTestCategory = 'Unit';
705 ## Special reason indicating that the flesh and blood sheriff has work to do.
706 ksUnitTestAddNew = 'Add new';
707
708 ## Used for indica that we shouldn't report anything for this test result ID and
709 ## consider promoting the previous error to test set level if it's the only one.
710 ktHarmless = ( 'Probably', 'Caused by previous error' );
711
712
713 def caseClosed(self, oCaseFile):
714 """
715 Reports the findings in the case and closes it.
716 """
717 #
718 # Log it and create a dReasonForReasultId we can use below.
719 #
720 dCommentForResultId = oCaseFile.dCommentForResultId;
721 if oCaseFile.dReasonForResultId:
722 # Must weed out ktHarmless.
723 dReasonForResultId = {};
724 for idKey, tReason in oCaseFile.dReasonForResultId.items():
725 if tReason is not self.ktHarmless:
726 dReasonForResultId[idKey] = tReason;
727 if not dReasonForResultId:
728 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
729 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
730 return False;
731
732 # Try promote to single reason.
733 atValues = dReasonForResultId.values();
734 fSingleReason = True;
735 if len(dReasonForResultId) == 1 and next(iter(dReasonForResultId.keys())) != oCaseFile.oTestSet.idTestResult:
736 self.dprint(u'Promoting single reason to whole set: %s' % (next(iter(atValues)),));
737 elif len(dReasonForResultId) > 1 and len(atValues) == list(atValues).count(next(iter(atValues))):
738 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), next(iter(atValues))));
739 else:
740 fSingleReason = False;
741 if fSingleReason:
742 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(atValues)), };
743 if dCommentForResultId:
744 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: next(iter(dCommentForResultId.values())), };
745 elif oCaseFile.tReason is not None:
746 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
747 else:
748 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
749 return False;
750
751 self.vprint(u'Closing %s with following reason%s: %s'
752 % ( oCaseFile.sName, 's' if len(dReasonForResultId) > 1 else '', dReasonForResultId, ));
753
754 #
755 # Add the test failure reason record(s).
756 #
757 for idTestResult, tReason in dReasonForResultId.items():
758 oFailureReason = self.getFailureReason(tReason);
759 if oFailureReason is not None:
760 sComment = 'Set by $Revision: 97171 $' # Handy for reverting later.
761 if idTestResult in dCommentForResultId:
762 sComment += ': ' + dCommentForResultId[idTestResult];
763
764 oAdd = TestResultFailureData();
765 oAdd.initFromValues(idTestResult = idTestResult,
766 idFailureReason = oFailureReason.idFailureReason,
767 uidAuthor = self.uidSelf,
768 idTestSet = oCaseFile.oTestSet.idTestSet,
769 sComment = sComment,);
770 if self.oConfig.fRealRun:
771 try:
772 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
773 except Exception as oXcpt:
774 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
775 % (oXcpt, oAdd, oCaseFile.sLongName,));
776 else:
777 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
778 return True;
779
780 #
781 # Tools for assiting log parsing.
782 #
783
784 @staticmethod
785 def matchFollowedByLines(sStr, off, asFollowingLines):
786 """ Worker for isThisFollowedByTheseLines. """
787
788 # Advance off to the end of the line.
789 off = sStr.find('\n', off);
790 if off < 0:
791 return False;
792 off += 1;
793
794 # Match each string with the subsequent lines.
795 for iLine, sLine in enumerate(asFollowingLines):
796 offEnd = sStr.find('\n', off);
797 if offEnd < 0:
798 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
799 if sLine and sStr.find(sLine, off, offEnd) < 0:
800 return False;
801
802 # next line.
803 off = offEnd + 1;
804
805 return True;
806
807 @staticmethod
808 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
809 """
810 Looks for a line contining sFirst which is then followed by lines
811 with the strings in asFollowingLines. (No newline chars anywhere!)
812 Returns True / False.
813 """
814 off = sStr.find(sFirst, 0);
815 while off >= 0:
816 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
817 return True;
818 off = sStr.find(sFirst, off + 1);
819 return False;
820
821 @staticmethod
822 def findAndReturnRestOfLine(sHaystack, sNeedle):
823 """
824 Looks for sNeedle in sHaystack.
825 Returns The text following the needle up to the end of the line.
826 Returns None if not found.
827 """
828 if sHaystack is None:
829 return None;
830 off = sHaystack.find(sNeedle);
831 if off < 0:
832 return None;
833 off += len(sNeedle)
834 offEol = sHaystack.find('\n', off);
835 if offEol < 0:
836 offEol = len(sHaystack);
837 return sHaystack[off:offEol]
838
839 @staticmethod
840 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
841 """
842 Looks for sNeedle in zeroe or more haystacks (asHaystack).
843 Returns The text following the first needed found up to the end of the line.
844 Returns None if not found.
845 """
846 for sHaystack in asHaystacks:
847 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
848 if sRet is not None:
849 return sRet;
850 return None;
851
852
853 #
854 # The investigative units.
855 #
856
857 katSimpleInstallUninstallMainLogReasons = [
858 # ( Whether to stop on hit, reason tuple, needle text. )
859 ( False, ktReason_Host_LeftoverService,
860 'SERVICE_NAME: vbox' ),
861 ( False, ktReason_Host_LeftoverService,
862 'Seems installation was skipped. Old version lurking behind? Not the fault of this build/test run!'),
863 ];
864
865 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
866 'darwin': [
867 # ( Whether to stop on hit, reason tuple, needle text. )
868 ( True, ktReason_Host_DriverNotUnloading,
869 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
870 ],
871 'linux': [
872 # ( Whether to stop on hit, reason tuple, needle text. )
873 ( True, ktReason_Host_DriverNotCompilable,
874 'This system is not currently set up to build kernel modules' ),
875 ( True, ktReason_Host_DriverNotCompilable,
876 'This system is currently not set up to build kernel modules' ),
877 ( True, ktReason_Host_InstallationFailed,
878 'vboxdrv.sh: failed: Look at /var/log/vbox-install.log to find out what went wrong.' ),
879 ( True, ktReason_Host_DriverNotUnloading,
880 'Cannot unload module vboxdrv'),
881 ],
882 'solaris': [
883 # ( Whether to stop on hit, reason tuple, needle text. )
884 ( True, ktReason_Host_DriverNotUnloading, 'can\'t unload the module: Device busy' ),
885 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: Host module ...FAILED!' ),
886 ( True, ktReason_Host_DriverNotUnloading, 'Unloading: NetFilter (Crossbow) module ...FAILED!' ),
887 ( True, ktReason_Host_InstallationFailed, 'svcadm: Couldn\'t bind to svc.configd.' ),
888 ( True, ktReason_Host_InstallationFailed, 'pkgadd: ERROR: postinstall script did not complete successfully' ),
889 ],
890 'win': [
891 # ( Whether to stop on hit, reason tuple, needle text. )
892 ( True, ktReason_Host_InstallationWantReboot, 'ERROR_SUCCESS_REBOOT_REQUIRED' ),
893 ( False, ktReason_Host_InstallationFailed, 'Installation error.' ),
894 ( True, ktReason_Host_InvalidPackage, 'Uninstaller failed, exit code: 1620' ),
895 ( True, ktReason_Host_InstallSourceAbsent, 'Uninstaller failed, exit code: 1612' ),
896 ],
897 };
898
899
900 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
901 """
902 Investigates an install or uninstall failure.
903
904 We lump the two together since the installation typically also performs
905 an uninstall first and will be seeing similar issues to the uninstall.
906 """
907 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
908
909 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
910 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
911 return True;
912
913 atSimple = self.katSimpleInstallUninstallMainLogReasons;
914 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
915 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
916
917 fFoundSomething = False;
918 for fStopOnHit, tReason, sNeedle in atSimple:
919 if sResultLog.find(sNeedle) > 0:
920 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
921 if fStopOnHit:
922 return True;
923 fFoundSomething = True;
924
925 return fFoundSomething if fFoundSomething else None;
926
927
928 def investigateBadTestBox(self, oCaseFile):
929 """
930 Checks out bad-testbox statuses.
931 """
932 _ = oCaseFile;
933 return False;
934
935
936 def investigateVBoxUnitTest(self, oCaseFile):
937 """
938 Checks out a VBox unittest problem.
939 """
940
941 #
942 # Process simple test case failures first, using their name as reason.
943 # We do the reason management just like for BSODs.
944 #
945 cRelevantOnes = 0;
946 sMainLog = oCaseFile.getMainLog();
947 aoFailedResults = oCaseFile.oTree.getListOfFailures();
948 for oFailedResult in aoFailedResults:
949 if oFailedResult is oCaseFile.oTree:
950 self.vprint('TODO: toplevel failure');
951 cRelevantOnes += 1
952
953 elif oFailedResult.sName == 'Installing VirtualBox':
954 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
955 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
956 cRelevantOnes += 1
957
958 elif oFailedResult.sName == 'Uninstalling VirtualBox':
959 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
960 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
961 cRelevantOnes += 1
962
963 elif oFailedResult.oParent is not None:
964 # Get the 2nd level node because that's where we'll find the unit test name.
965 while oFailedResult.oParent.oParent is not None:
966 oFailedResult = oFailedResult.oParent;
967
968 # Only report a failure once.
969 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
970 sKey = oFailedResult.sName;
971 if sKey.startswith('testcase/'):
972 sKey = sKey[9:];
973 if sKey in self.asUnitTestReasons:
974 tReason = ( self.ksUnitTestCategory, sKey );
975 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
976 else:
977 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
978 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
979 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
980 cRelevantOnes += 1
981 else:
982 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
983
984 #
985 # If we've caught all the relevant ones by now, report the result.
986 #
987 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
988 return self.caseClosed(oCaseFile);
989 return False;
990
991 def extractGuestCpuStack(self, sInfoText):
992 """
993 Extracts the guest CPU stacks from the input file.
994
995 Returns a dictionary keyed by the CPU number, value being a list of
996 raw stack lines (no header).
997 Returns empty dictionary if no stacks where found.
998 """
999 dRet = {};
1000 off = 0;
1001 while True:
1002 # Find the stack.
1003 offStart = sInfoText.find('=== start guest stack VCPU ', off);
1004 if offStart < 0:
1005 break;
1006 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
1007 if offEnd >= 0:
1008 offEnd += 3;
1009 else:
1010 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
1011 if offEnd < 0:
1012 offEnd = len(sInfoText);
1013
1014 sStack = sInfoText[offStart : offEnd];
1015 sStack = sStack.replace('\r',''); # paranoia
1016 asLines = sStack.split('\n');
1017
1018 # Figure the CPU.
1019 asWords = asLines[0].split();
1020 if len(asWords) < 6 or not asWords[5].isdigit():
1021 break;
1022 iCpu = int(asWords[5]);
1023
1024 # Add it and advance.
1025 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
1026 off = offEnd;
1027 return dRet;
1028
1029 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
1030 """ Investigates kvm_lock_spinning deadlocks """
1031 #
1032 # Extract the stacks. We need more than one CPU to create a deadlock.
1033 #
1034 dStacks = self.extractGuestCpuStack(sInfoText);
1035 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
1036 if len(dStacks) >= 2:
1037 #
1038 # Examin each of the stacks. Each must have kvm_lock_spinning in
1039 # one of the first three entries.
1040 #
1041 cHits = 0;
1042 for asBacktrace in dStacks.values():
1043 for iFrame in xrange(min(3, len(asBacktrace))):
1044 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
1045 cHits += 1;
1046 break;
1047 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
1048 if cHits == len(dStacks):
1049 return (True, self.ktReason_VMM_kvm_lock_spinning);
1050
1051 _ = dLogs; _ = oCaseFile;
1052 return (False, None);
1053
1054 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
1055 """ Investigates HalReturnToFirmware hangs """
1056 del oCaseFile
1057 del sInfoText
1058 del dLogs
1059 # hope that's sufficient
1060 return (True, self.ktReason_Unknown_HalReturnToFirmware);
1061
1062 ## Things we search a main or VM log for to figure out why something went bust.
1063 ## @note DO NOT ADD MORE STUFF HERE!
1064 ## Please use katSimpleMainLogReasons and katSimpleVmLogReasons instead!
1065 katSimpleMainAndVmLogReasonsDeprecated = [
1066 # ( Whether to stop on hit, reason tuple, needle text. )
1067 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
1068 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
1069 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
1070 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
1071 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
1072 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
1073 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
1074 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
1075 ( True, ktReason_Networking_Nonexistent_host_nic,
1076 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1077 ( True, ktReason_Networking_VERR_INTNET_FLT_IF_NOT_FOUND,
1078 'Failed to attach the network LUN (VERR_INTNET_FLT_IF_NOT_FOUND)' ),
1079 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
1080 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1081 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1082 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1083 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
1084 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
1085 ( True, ktReason_Unknown_File_Not_Found,
1086 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
1087 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
1088 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
1089 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
1090 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
1091 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
1092 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
1093 #( False, ktReason_Unknown_VM_Start_Error, 'VMSetError: ' ), - false positives for stuff like:
1094 # "VMSetError: VD: Backend 'VBoxIsoMaker' does not support async I/O"
1095 ( False, ktReason_Unknown_VM_Start_Error, 'error: failed to open session for' ),
1096 ( False, ktReason_Unknown_VM_Runtime_Error, 'Console: VM runtime error: fatal=true' ),
1097 ];
1098
1099 ## This we search a main log for to figure out why something went bust.
1100 katSimpleMainLogReasons = [
1101 # ( Whether to stop on hit, reason tuple, needle text. )
1102 ( False, ktReason_Host_win32com_gen_py, 'ModuleNotFoundError: No module named \'win32com.gen_py' ),
1103
1104 ];
1105
1106 ## This we search a VM log for to figure out why something went bust.
1107 katSimpleVmLogReasons = [
1108 # ( Whether to stop on hit, reason tuple, needle text. )
1109 # Note: Works for ATA and VD drivers.
1110 ( False, ktReason_Host_DiskFull, '_DISKFULL' ),
1111 ];
1112
1113 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
1114 katSimpleVBoxHardeningLogReasons = [
1115 # ( Whether to stop on hit, reason tuple, needle text. )
1116 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
1117 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
1118 ( True, ktReason_Host_TSTInfo_Accuracy_OOR, 'RTCRTSPTSTINFO::Accuracy::Millis: Out of range' ),
1119 ( False, ktReason_Unknown_VM_Crash, 'Quitting: ExitCode=0xc0000005 (rcNtWait=' ),
1120 ];
1121
1122 ## Things we search a kernel.log file for to figure out why something went bust.
1123 katSimpleKernelLogReasons = [
1124 # ( Whether to stop on hit, reason tuple, needle text. )
1125 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
1126 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn\'t work' ),
1127 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
1128 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libQt5CoreVBox' ),
1129 ( True, ktReason_GuestBug_CompizVBoxQt, 'error 4 in libgtk-3' ),
1130 ];
1131
1132 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
1133 katSimpleVgaTextReasons = [
1134 # ( Whether to stop on hit, reason tuple, needle text. )
1135 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1136 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
1137 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
1138 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
1139 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
1140 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
1141 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
1142 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
1143 "and send a report. Then try booting with the 'noapic' option\n"
1144 "\n" ),
1145 ( True, ktReason_OSInstall_GRUB_hang,
1146 "-----\nGRUB Loading stage2..\n\n\n\n" ),
1147 ( True, ktReason_OSInstall_GRUB_hang,
1148 "-----\nGRUB Loading stage2...\n\n\n\n" ), # the 3 dot hang appears to be less frequent
1149 ( True, ktReason_OSInstall_GRUB_hang,
1150 "-----\nGRUB Loading stage2....\n\n\n\n" ), # the 4 dot hang appears to be very infrequent
1151 ( True, ktReason_OSInstall_GRUB_hang,
1152 "-----\nGRUB Loading stage2.....\n\n\n\n" ), # the 5 dot hang appears to be more frequent again
1153 ( True, ktReason_OSInstall_Udev_hang,
1154 "\nStarting udev:\n\n\n\n" ),
1155 ( True, ktReason_OSInstall_Udev_hang,
1156 "\nStarting udev:\n------" ),
1157 ( True, ktReason_Panic_BootManagerC000000F,
1158 "Windows failed to start. A recent hardware or software change might be the" ),
1159 ( True, ktReason_BootManager_Image_corrupt,
1160 "BOOTMGR image is corrupt. The system cannot boot." ),
1161 ];
1162
1163 ## Things we search for in the info.txt file. Require handlers for now.
1164 katInfoTextHandlers = [
1165 # ( Trigger text, handler method )
1166 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
1167 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
1168 ];
1169
1170 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
1171 katSimpleScreenshotHashReasons = [
1172 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
1173 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
1174 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
1175 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
1176 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
1177 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
1178 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
1179 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
1180 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
1181 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
1182 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
1183 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
1184 ];
1185
1186
1187 def scanLog(self, asLogs, atNeedles, oCaseFile, idTestResult):
1188 """
1189 Scans for atNeedles in sLog.
1190
1191 Returns True if a stop-on-hit neelde was found.
1192 Returns None if a no-stop reason was found.
1193 Returns False if no hit.
1194 """
1195 fRet = False;
1196 for fStopOnHit, tReason, oNeedle in atNeedles:
1197 fMatch = False;
1198 if utils.isString(oNeedle):
1199 for sLog in asLogs:
1200 if sLog:
1201 fMatch |= sLog.find(oNeedle) > 0;
1202 else:
1203 for sLog in asLogs:
1204 if sLog:
1205 fMatch |= oNeedle.search(sLog) is not None;
1206 if fMatch:
1207 oCaseFile.noteReasonForId(tReason, idTestResult);
1208 if fStopOnHit:
1209 return True;
1210 fRet = None;
1211 return fRet;
1212
1213
1214 def investigateGATest(self, oCaseFile, oFailedResult, sResultLog):
1215 """
1216 Investigates a failed VM run.
1217 """
1218 enmReason = None;
1219 sParentName = oFailedResult.oParent.sName if oFailedResult.oParent else '';
1220 if oFailedResult.sName == 'VBoxWindowsAdditions.exe' or sResultLog.find('VBoxWindowsAdditions.exe" failed with') > 0:
1221 enmReason = self.ktReason_Add_Installer_Win_Failed;
1222 # guest control:
1223 elif sParentName == 'Guest Control' and oFailedResult.sName == 'Preparations':
1224 enmReason = self.ktReason_Add_GstCtl_Preparations;
1225 elif oFailedResult.sName == 'Session Basics':
1226 enmReason = self.ktReason_Add_GstCtl_SessionBasics;
1227 elif oFailedResult.sName == 'Session Process References':
1228 enmReason = self.ktReason_Add_GstCtl_SessionProcRefs;
1229 elif oFailedResult.sName == 'Copy from guest':
1230 if sResultLog.find('*** abort action ***') >= 0:
1231 enmReason = self.ktReason_Add_GstCtl_CopyFromGuest_Timeout;
1232 elif oFailedResult.sName == 'Copy to guest':
1233 off = sResultLog.find('"Guest directory "');
1234 if off > 0 and sResultLog.find('" already exists"', off, off + 80):
1235 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstExists;
1236 elif sResultLog.find('Guest destination must not be empty') >= 0:
1237 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_DstEmpty;
1238 elif sResultLog.find('*** abort action ***') >= 0:
1239 enmReason = self.ktReason_Add_GstCtl_CopyToGuest_Timeout;
1240 elif oFailedResult.sName.find('Session w/ Guest Reboot') >= 0:
1241 enmReason = self.ktReason_Add_GstCtl_Session_Reboot;
1242 # shared folders:
1243 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Automounting':
1244 enmReason = self.ktReason_Add_ShFl_Automount;
1245 elif oFailedResult.sName == 'mmap':
1246 if sResultLog.find('FsPerf: Flush issue at offset ') >= 0:
1247 enmReason = self.ktReason_Add_Mmap_Coherency;
1248 elif sResultLog.find('FlushViewOfFile') >= 0:
1249 enmReason = self.ktReason_Add_FlushViewOfFile;
1250 elif sParentName == 'Shared Folders' and oFailedResult.sName == 'Running FsPerf':
1251 enmReason = self.ktReason_Add_ShFl_FsPerf; ## Maybe it would be better to be more specific...
1252
1253 if enmReason is not None:
1254 return oCaseFile.noteReasonForId(enmReason, oFailedResult.idTestResult);
1255
1256 self.vprint(u'TODO: Cannot place GA failure idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1257 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1258 return False;
1259
1260 def isResultFromGATest(self, oCaseFile, oFailedResult):
1261 """
1262 Checks if this result and corresponding log snippet looks like a GA test run.
1263 """
1264 while oFailedResult is not None:
1265 if oFailedResult.sName in [ 'Guest Control', 'Shared Folders', 'FsPerf', 'VBoxWindowsAdditions.exe' ]:
1266 return True;
1267 if oCaseFile.oTestCase.sName == 'Guest Additions' and oFailedResult.sName in [ 'Install', ]:
1268 return True;
1269 oFailedResult = oFailedResult.oParent;
1270 return False;
1271
1272
1273 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
1274 """
1275 Investigates a failed VM run.
1276 """
1277
1278 def investigateLogSet():
1279 """
1280 Investigates the current set of VM related logs.
1281 """
1282 self.dprint('investigateLogSet: log lengths: result %u, VM %u, kernel %u, vga text %u, info text %u, hard %u'
1283 % ( len(sResultLog if sResultLog else ''),
1284 len(sVMLog if sVMLog else ''),
1285 len(sKrnlLog if sKrnlLog else ''),
1286 len(sVgaText if sVgaText else ''),
1287 len(sInfoText if sInfoText else ''),
1288 len(sNtHardLog if sNtHardLog else ''),));
1289
1290 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
1291 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
1292 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
1293 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
1294 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
1295 #self.dprint(u'hard.txt<<<\n%s\n<<<\n' % (sNtHardLog,));
1296
1297 # TODO: more
1298
1299 #
1300 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
1301 #
1302 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
1303 'GIM: HyperV: Guest indicates a fatal condition! P0=');
1304 if sDetails is not None:
1305 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
1306 sKey = sDetails.split(' ', 1)[0];
1307 try: sKey = '0x%08X' % (int(sKey, 16),);
1308 except: pass;
1309 if sKey in self.asBsodReasons:
1310 tReason = ( self.ksBsodCategory, sKey );
1311 elif sKey.lower() in self.asBsodReasons: # just in case.
1312 tReason = ( self.ksBsodCategory, sKey.lower() );
1313 else:
1314 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
1315 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
1316 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
1317
1318 fFoundSomething = False;
1319
1320 #
1321 # Look for linux panic.
1322 #
1323 if sKrnlLog is not None:
1324 fRet = self.scanLog([sKrnlLog,], self.katSimpleKernelLogReasons, oCaseFile, oFailedResult.idTestResult);
1325 if fRet is True:
1326 return fRet;
1327 fFoundSomething |= fRet is None;
1328
1329 #
1330 # Loop thru the simple stuff.
1331 #
1332
1333 # Main log.
1334 fRet = self.scanLog([sResultLog,], self.katSimpleMainLogReasons, oCaseFile, oFailedResult.idTestResult);
1335 if fRet is True:
1336 return fRet;
1337 fFoundSomething |= fRet is None;
1338
1339 # VM log.
1340 fRet = self.scanLog([sVMLog,], self.katSimpleVmLogReasons, oCaseFile, oFailedResult.idTestResult);
1341 if fRet is True:
1342 return fRet;
1343 fFoundSomething |= fRet is None;
1344
1345 # Old main + vm log.
1346 fRet = self.scanLog([sResultLog, sVMLog], self.katSimpleMainAndVmLogReasonsDeprecated,
1347 oCaseFile, oFailedResult.idTestResult);
1348 if fRet is True:
1349 return fRet;
1350 fFoundSomething |= fRet is None;
1351
1352 # Continue with vga text.
1353 if sVgaText:
1354 fRet = self.scanLog([sVgaText,], self.katSimpleVgaTextReasons, oCaseFile, oFailedResult.idTestResult);
1355 if fRet is True:
1356 return fRet;
1357 fFoundSomething |= fRet is None;
1358
1359 # Continue with screen hashes.
1360 if sScreenHash is not None:
1361 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1362 if sScreenHash == sHash:
1363 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1364 if fStopOnHit:
1365 return True;
1366 fFoundSomething = True;
1367
1368 # Check VBoxHardening.log.
1369 if sNtHardLog is not None:
1370 fRet = self.scanLog([sNtHardLog,], self.katSimpleVBoxHardeningLogReasons, oCaseFile, oFailedResult.idTestResult);
1371 if fRet is True:
1372 return fRet;
1373 fFoundSomething |= fRet is None;
1374
1375 #
1376 # Complicated stuff.
1377 #
1378 dLogs = {
1379 'sVMLog': sVMLog,
1380 'sNtHardLog': sNtHardLog,
1381 'sScreenHash': sScreenHash,
1382 'sKrnlLog': sKrnlLog,
1383 'sVgaText': sVgaText,
1384 'sInfoText': sInfoText,
1385 };
1386
1387 # info.txt.
1388 if sInfoText:
1389 for sNeedle, fnHandler in self.katInfoTextHandlers:
1390 if sInfoText.find(sNeedle) > 0:
1391 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1392 if tReason is not None:
1393 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1394 if fStop:
1395 return True;
1396 fFoundSomething = True;
1397
1398 #
1399 # Check for repeated reboots...
1400 #
1401 if sVMLog is not None:
1402 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1403 if cResets > 10:
1404 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1405 sComment = 'Counted %s reboots' % (cResets,));
1406
1407 return fFoundSomething;
1408
1409 #
1410 # Check if we got any VM or/and kernel logs. Treat them as sets in
1411 # case we run multiple VMs here (this is of course ASSUMING they
1412 # appear in the order that terminateVmBySession uploads them).
1413 #
1414 cTimes = 0;
1415 sVMLog = None;
1416 sNtHardLog = None;
1417 sScreenHash = None;
1418 sKrnlLog = None;
1419 sVgaText = None;
1420 sInfoText = None;
1421 for oFile in oFailedResult.aoFiles:
1422 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1423 if 'VBoxHardening.log' not in oFile.sFile:
1424 if sVMLog is not None:
1425 if investigateLogSet() is True:
1426 return True;
1427 cTimes += 1;
1428 sInfoText = None;
1429 sVgaText = None;
1430 sKrnlLog = None;
1431 sScreenHash = None;
1432 sNtHardLog = None;
1433 sVMLog = oCaseFile.getLogFile(oFile);
1434 else:
1435 sNtHardLog = oCaseFile.getLogFile(oFile);
1436 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1437 sKrnlLog = oCaseFile.getLogFile(oFile);
1438 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1439 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1440 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1441 sInfoText = oCaseFile.getLogFile(oFile);
1442 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1443 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1444 if sScreenHash is not None:
1445 sScreenHash = sScreenHash.lower();
1446 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1447
1448 if ( sVMLog is not None \
1449 or sNtHardLog is not None \
1450 or cTimes == 0) \
1451 and investigateLogSet() is True:
1452 return True;
1453
1454 return None;
1455
1456 def isResultFromVMRun(self, oFailedResult, sResultLog):
1457 """
1458 Checks if this result and corresponding log snippet looks like a VM run.
1459 """
1460
1461 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1462 if sResultLog.find(' startVm') > 0:
1463 return True;
1464
1465 # Any other indicators? No?
1466 _ = oFailedResult;
1467 return False;
1468
1469
1470 ## Things we search a VBoxSVC log for to figure out why something went bust.
1471 katSimpleSvcLogReasons = [
1472 # ( Whether to stop on hit, reason tuple, needle text. )
1473 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* exited normally: -1073741819 \(0xc0000005\)') ),
1474 ( False, ktReason_Unknown_VM_Crash, re.compile(r'Reaper.* was signalled: 11 \(0xb\)') ),
1475 ];
1476
1477 def investigateSvcLogForVMRun(self, oCaseFile, sSvcLog):
1478 """
1479 Check the VBoxSVC log for a single VM run.
1480 """
1481 if sSvcLog:
1482 fRet = self.scanLog([sSvcLog,], self.katSimpleSvcLogReasons, oCaseFile, oCaseFile.oTree.idTestResult);
1483 if fRet is True or fRet is None:
1484 return True;
1485 return False;
1486
1487 def investigateNtHardLogForVMRun(self, oCaseFile):
1488 """
1489 Check if the hardening log for a single VM run contains VM crash indications.
1490 """
1491 aoLogFiles = oCaseFile.oTree.getListOfLogFilesByKind(TestResultFileData.ksKind_LogReleaseVm);
1492 for oLogFile in aoLogFiles:
1493 if oLogFile.sFile.find('VBoxHardening.log') >= 0:
1494 sLog = oCaseFile.getLogFile(oLogFile);
1495 if sLog.find('Quitting: ExitCode=0xc0000005') >= 0:
1496 return oCaseFile.noteReasonForId(self.ktReason_Unknown_VM_Crash, oCaseFile.oTree.idTestResult);
1497 return False;
1498
1499
1500 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1501 """
1502 Checks out a VBox VM test.
1503
1504 This is generic investigation of a test running one or more VMs, like
1505 for example a smoke test or a guest installation test.
1506
1507 The fSingleVM parameter is a hint, which probably won't come in useful.
1508 """
1509 _ = fSingleVM;
1510
1511 #
1512 # Get a list of test result failures we should be looking into and the main log.
1513 #
1514 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1515 sMainLog = oCaseFile.getMainLog();
1516
1517 #
1518 # There are a set of errors ending up on the top level result record.
1519 # Should deal with these first.
1520 #
1521 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1522 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1523 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1524 # ignore other failures in the test if we're not a little bit careful here.
1525 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1526 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1527 return self.caseClosed(oCaseFile);
1528
1529 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1530 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1531 (' Timeout: ', ' Attempting to abort child...',) ):
1532 if sMainLog.find('*** glibc detected *** /') > 0:
1533 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1534 else:
1535 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1536 return self.caseClosed(oCaseFile);
1537
1538 # Look for heap corruption without visible hang.
1539 if sMainLog.find('*** glibc detected *** /') > 0 \
1540 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1541 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1542 return self.caseClosed(oCaseFile);
1543
1544 # Out of memory w/ timeout.
1545 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1546 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1547 return self.caseClosed(oCaseFile);
1548
1549 # Stale files like vts_rm.exe (windows).
1550 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1551 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1552 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1553 return self.caseClosed(oCaseFile);
1554
1555 #
1556 # XPCOM screwup
1557 #
1558 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1559 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1560 return self.caseClosed(oCaseFile);
1561
1562 #
1563 # Go thru each failed result.
1564 #
1565 for oFailedResult in aoFailedResults:
1566 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1567 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1568 if oFailedResult.sName == 'Installing VirtualBox':
1569 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1570
1571 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1572 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1573
1574 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1575 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1576
1577 elif self.isResultFromGATest(oCaseFile, oFailedResult):
1578 self.investigateGATest(oCaseFile, oFailedResult, sResultLog);
1579
1580 elif sResultLog.find('most likely not unique') > 0:
1581 oCaseFile.noteReasonForId(self.ktReason_Host_NetworkMisconfiguration, oFailedResult.idTestResult)
1582 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1583 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1584
1585 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1586 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1587 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1588
1589 elif sResultLog.find('** error: no action was specified') > 0 \
1590 or sResultLog.find('(len(self._asXml, asText))') > 0:
1591 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1592
1593 else:
1594 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1595 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1596
1597 #
1598 # Windows python/com screwup.
1599 #
1600 if sMainLog.find('ModuleNotFoundError: No module named \'win32com.gen_py') > 0:
1601 oCaseFile.noteReason(self.ktReason_Host_win32com_gen_py);
1602 return self.caseClosed(oCaseFile);
1603
1604 #
1605 # Check VBoxSVC.log and VBoxHardening.log for VM crashes if inconclusive on single VM runs.
1606 #
1607 if fSingleVM and len(oCaseFile.dReasonForResultId) < len(aoFailedResults):
1608 self.dprint(u'Got %u out of %u - checking VBoxSVC.log...'
1609 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1610 if self.investigateSvcLogForVMRun(oCaseFile, oCaseFile.getSvcLog()):
1611 return self.caseClosed(oCaseFile);
1612 if self.investigateNtHardLogForVMRun(oCaseFile):
1613 return self.caseClosed(oCaseFile);
1614
1615 #
1616 # Report home and close the case if we got them all, otherwise log it.
1617 #
1618 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1619 return self.caseClosed(oCaseFile);
1620
1621 if oCaseFile.dReasonForResultId:
1622 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1623 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1624 else:
1625 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1626 return False;
1627
1628
1629 ## Things we search a main log for to figure out why something in the API test went bust.
1630 katSimpleApiMainLogReasons = [
1631 # ( Whether to stop on hit, reason tuple, needle text. )
1632 ( True, ktReason_Networking_Nonexistent_host_nic,
1633 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
1634 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
1635 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
1636 ( True, ktReason_API_std_bad_alloc, 'Unexpected exception: std::bad_alloc' ),
1637 ( True, ktReason_API_Digest_Mismatch, 'Digest mismatch (VERR_NOT_EQUAL)' ),
1638 ( True, ktReason_API_MoveVM_SharingViolation, 'rc=VBOX_E_IPRT_ERROR text="Could not copy the log file ' ),
1639 ( True, ktReason_API_MoveVM_InvalidParameter,
1640 'rc=VBOX_E_IPRT_ERROR text="Could not copy the setting file ' ),
1641 ( True, ktReason_API_Open_Session_Failed, 'error: failed to open session for' ),
1642 ];
1643
1644 def investigateVBoxApiTest(self, oCaseFile):
1645 """
1646 Checks out a VBox API test.
1647 """
1648
1649 #
1650 # Get a list of test result failures we should be looking into and the main log.
1651 #
1652 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1653 sMainLog = oCaseFile.getMainLog();
1654
1655 #
1656 # Go thru each failed result.
1657 #
1658 for oFailedResult in aoFailedResults:
1659 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1660 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1661 if oFailedResult.sName == 'Installing VirtualBox':
1662 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1663
1664 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1665 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1666
1667 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1668 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1669
1670 else:
1671 fFoundSomething = False;
1672 for fStopOnHit, tReason, sNeedle in self.katSimpleApiMainLogReasons:
1673 if sResultLog.find(sNeedle) > 0:
1674 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1675 fFoundSomething = True;
1676 if fStopOnHit:
1677 break;
1678 if fFoundSomething:
1679 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1680 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1681
1682 #
1683 # Report home and close the case if we got them all, otherwise log it.
1684 #
1685 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1686 return self.caseClosed(oCaseFile);
1687
1688 if oCaseFile.dReasonForResultId:
1689 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1690 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1691 else:
1692 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1693 return False;
1694
1695
1696 def reasoningFailures(self):
1697 """
1698 Guess the reason for failures.
1699 """
1700 #
1701 # Get a list of failed test sets without any assigned failure reason.
1702 #
1703 cGot = 0;
1704 if not self.oConfig.aidTestSets:
1705 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack,
1706 tsNow = self.tsNow);
1707 else:
1708 aoTestSets = [self.oTestSetLogic.getById(idTestSet) for idTestSet in self.oConfig.aidTestSets];
1709 for oTestSet in aoTestSets:
1710 self.dprint(u'----------------------------------- #%u, status %s -----------------------------------'
1711 % ( oTestSet.idTestSet, oTestSet.enmStatus,));
1712
1713 #
1714 # Open a case file and assign it to the right investigator.
1715 #
1716 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1717 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1718 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1719 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1720 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1721
1722 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1723
1724 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1725 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1726 fRc = self.investigateBadTestBox(oCaseFile);
1727
1728 elif oCaseFile.isVBoxUnitTest():
1729 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1730 fRc = self.investigateVBoxUnitTest(oCaseFile);
1731
1732 elif oCaseFile.isVBoxInstallTest() or oCaseFile.isVBoxUnattendedInstallTest():
1733 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1734 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1735
1736 elif oCaseFile.isVBoxUSBTest():
1737 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1738 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1739
1740 elif oCaseFile.isVBoxStorageTest():
1741 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1742 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1743
1744 elif oCaseFile.isVBoxGAsTest():
1745 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1746 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1747
1748 elif oCaseFile.isVBoxAPITest():
1749 self.dprint(u'investigateVBoxApiTest is taking over %s.' % (oCaseFile.sLongName,));
1750 fRc = self.investigateVBoxApiTest(oCaseFile);
1751
1752 elif oCaseFile.isVBoxBenchmarkTest():
1753 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1754 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1755
1756 elif oCaseFile.isVBoxSmokeTest():
1757 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1758 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1759
1760 elif oCaseFile.isVBoxSerialTest():
1761 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1762 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1763
1764 else:
1765 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1766 fRc = False;
1767 cGot += fRc is True;
1768
1769 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1770 return 0;
1771
1772
1773 def main(self):
1774 """
1775 The 'main' function.
1776 Return exit code (0, 1, etc).
1777 """
1778 # Database stuff.
1779 self.oDb = TMDatabaseConnection()
1780 self.oTestResultLogic = TestResultLogic(self.oDb);
1781 self.oTestSetLogic = TestSetLogic(self.oDb);
1782 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1783 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1784 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1785 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1786
1787 # Get a fix on our 'now' before we do anything..
1788 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1789 self.tsNow = self.oDb.fetchOne();
1790
1791 # If we're suppost to commit anything we need to get our user ID.
1792 rcExit = 0;
1793 if self.oConfig.fRealRun:
1794 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1795 if self.oLogin is None:
1796 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1797 else:
1798 self.uidSelf = self.oLogin.uid;
1799
1800 #
1801 # Do the stuff.
1802 #
1803 if rcExit == 0:
1804 rcExit = self.selfCheck();
1805 if rcExit == 0:
1806 rcExit = self.badTestBoxManagement();
1807 rcExit2 = self.reasoningFailures();
1808 if rcExit == 0:
1809 rcExit = rcExit2;
1810 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1811 if rcExit == 0:
1812 rcExit = self.badTestBoxManagement();
1813
1814 # Cleanup.
1815 self.oFailureReasonLogic = None;
1816 self.oTestResultFailureLogic = None;
1817 self.oTestSetLogic = None;
1818 self.oTestResultLogic = None;
1819 self.oDb.close();
1820 self.oDb = None;
1821 if self.oLogFile is not None:
1822 self.oLogFile.close();
1823 self.oLogFile = None;
1824 return rcExit;
1825
1826if __name__ == '__main__':
1827 sys.exit(VirtualTestSheriff().main());
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette