VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 68141

Last change on this file since 68141 was 68141, checked in by vboxsync, 8 years ago

vsheriff: look for HalReturnToFirmware

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 63.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 68141 2017-07-27 16:13:40Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 68141 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if self.sMainLog:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 68141 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
376 ];
377
378 #
379 # Get list of bad test boxes for given period and check them out individually.
380 #
381 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
382 aidFailureReasons = aidFailureReasons);
383 for idTestBox in aidBadTestBoxes:
384 # Skip if the testbox is already disabled or has a pending reboot command.
385 try:
386 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
387 except Exception as oXcpt:
388 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
389 continue;
390 if not oTestBox.fEnabled:
391 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
392 % ( idTestBox, oTestBox.sName, ));
393 continue;
394 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
395 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
396 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
397 continue;
398
399 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
400 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
401 cOkay = 0;
402 cBad = 0;
403 iFirstOkay = len(aoSets);
404 for iSet, oSet in enumerate(aoSets):
405 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
406 cBad += 1;
407 else:
408 # Check for bad failure reasons.
409 oFailure = None;
410 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
411 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
412 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
413 cBad += 1;
414 else:
415 # This is an okay test result then.
416 ## @todo maybe check the elapsed time here, it could still be a bad run?
417 cOkay += 1;
418 if iFirstOkay > iSet:
419 iFirstOkay = iSet;
420 if iSet > 10:
421 break;
422
423 # We react if there are two or more bad-testbox statuses at the head of the
424 # history and at least three in the last 10 results.
425 if iFirstOkay >= 2 and cBad > 2:
426 # Frank: For now don't reboot boxes automatically
427 if True or oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
428 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
429 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
430 if self.oConfig.fRealRun is True:
431 try:
432 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
433 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
434 % (iFirstOkay, cBad, cOkay),);
435 except Exception as oXcpt:
436 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
437 else:
438 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
439 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
440 if self.oConfig.fRealRun is True:
441 try:
442 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
443 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
444 % (iFirstOkay, cBad, cOkay),);
445 except Exception as oXcpt:
446 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
447 else:
448 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
449 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
450 return rcExit;
451
452
453 ## @name Failure reasons we know.
454 ## @{
455 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
456 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
457 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
458 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
459 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
460 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
461 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
462 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
463 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
464 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
465 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
466 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
467 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
468 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
469 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
470 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
471 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
472 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
473 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
474 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
475 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
476 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
477 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
478 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
479 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
480 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
481 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
482 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
483 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
484 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
485 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
486 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
487 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
488 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
489 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
490 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
491 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
492 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
493 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
494 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
495 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
496 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
497 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
498 ## @}
499
500 ## BSOD category.
501 ksBsodCategory = 'BSOD';
502 ## Special reason indicating that the flesh and blood sheriff has work to do.
503 ksBsodAddNew = 'Add new BSOD';
504
505 ## Unit test category.
506 ksUnitTestCategory = 'Unit';
507 ## Special reason indicating that the flesh and blood sheriff has work to do.
508 ksUnitTestAddNew = 'Add new';
509
510 ## Used for indica that we shouldn't report anything for this test result ID and
511 ## consider promoting the previous error to test set level if it's the only one.
512 ktHarmless = ( 'Probably', 'Caused by previous error' );
513
514
515 def caseClosed(self, oCaseFile):
516 """
517 Reports the findings in the case and closes it.
518 """
519 #
520 # Log it and create a dReasonForReasultId we can use below.
521 #
522 dCommentForResultId = oCaseFile.dCommentForResultId;
523 if oCaseFile.dReasonForResultId:
524 # Must weed out ktHarmless.
525 dReasonForResultId = {};
526 for idKey, tReason in oCaseFile.dReasonForResultId.items():
527 if tReason is not self.ktHarmless:
528 dReasonForResultId[idKey] = tReason;
529 if not dReasonForResultId:
530 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
531 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
532 return False;
533
534 # Try promote to single reason.
535 atValues = dReasonForResultId.values();
536 fSingleReason = True;
537 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
538 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
539 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
540 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
541 else:
542 fSingleReason = False;
543 if fSingleReason:
544 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
545 if dCommentForResultId:
546 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
547 elif oCaseFile.tReason is not None:
548 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
549 else:
550 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
551 return False;
552
553 self.vprint(u'Closing %s with following reason%s: %s'
554 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
555
556 #
557 # Add the test failure reason record(s).
558 #
559 for idTestResult, tReason in dReasonForResultId.items():
560 oFailureReason = self.getFailureReason(tReason);
561 if oFailureReason is not None:
562 sComment = 'Set by $Revision: 68141 $' # Handy for reverting later.
563 if idTestResult in dCommentForResultId:
564 sComment += ': ' + dCommentForResultId[idTestResult];
565
566 oAdd = TestResultFailureData();
567 oAdd.initFromValues(idTestResult = idTestResult,
568 idFailureReason = oFailureReason.idFailureReason,
569 uidAuthor = self.uidSelf,
570 idTestSet = oCaseFile.oTestSet.idTestSet,
571 sComment = sComment,);
572 if self.oConfig.fRealRun:
573 try:
574 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
575 except Exception as oXcpt:
576 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
577 % (oXcpt, oAdd, oCaseFile.sLongName,));
578 else:
579 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
580 return True;
581
582 #
583 # Tools for assiting log parsing.
584 #
585
586 @staticmethod
587 def matchFollowedByLines(sStr, off, asFollowingLines):
588 """ Worker for isThisFollowedByTheseLines. """
589
590 # Advance off to the end of the line.
591 off = sStr.find('\n', off);
592 if off < 0:
593 return False;
594 off += 1;
595
596 # Match each string with the subsequent lines.
597 for iLine, sLine in enumerate(asFollowingLines):
598 offEnd = sStr.find('\n', off);
599 if offEnd < 0:
600 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
601 if sLine and sStr.find(sLine, off, offEnd) < 0:
602 return False;
603
604 # next line.
605 off = offEnd + 1;
606
607 return True;
608
609 @staticmethod
610 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
611 """
612 Looks for a line contining sFirst which is then followed by lines
613 with the strings in asFollowingLines. (No newline chars anywhere!)
614 Returns True / False.
615 """
616 off = sStr.find(sFirst, 0);
617 while off >= 0:
618 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
619 return True;
620 off = sStr.find(sFirst, off + 1);
621 return False;
622
623 @staticmethod
624 def findAndReturnRestOfLine(sHaystack, sNeedle):
625 """
626 Looks for sNeedle in sHaystack.
627 Returns The text following the needle up to the end of the line.
628 Returns None if not found.
629 """
630 if sHaystack is None:
631 return None;
632 off = sHaystack.find(sNeedle);
633 if off < 0:
634 return None;
635 off += len(sNeedle)
636 offEol = sHaystack.find('\n', off);
637 if offEol < 0:
638 offEol = len(sHaystack);
639 return sHaystack[off:offEol]
640
641 @staticmethod
642 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
643 """
644 Looks for sNeedle in zeroe or more haystacks (asHaystack).
645 Returns The text following the first needed found up to the end of the line.
646 Returns None if not found.
647 """
648 for sHaystack in asHaystacks:
649 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
650 if sRet is not None:
651 return sRet;
652 return None;
653
654
655 #
656 # The investigative units.
657 #
658
659 katSimpleInstallUninstallMainLogReasons = [
660 # ( Whether to stop on hit, reason tuple, needle text. )
661 ( False, ktReason_Host_LeftoverService,
662 'SERVICE_NAME: vbox' ),
663 ];
664
665 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
666 'darwin': [
667 # ( Whether to stop on hit, reason tuple, needle text. )
668 ( True, ktReason_Host_DriverNotUnloading,
669 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
670 ],
671 'linux': [
672 # ( Whether to stop on hit, reason tuple, needle text. )
673 ( True, ktReason_Host_DriverNotCompilable,
674 'This system is not currently set up to build kernel modules' ),
675 ( True, ktReason_Host_DriverNotCompilable,
676 'This system is currently not set up to build kernel modules' ),
677 ],
678 };
679
680
681 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
682 """
683 Investigates an install or uninstall failure.
684
685 We lump the two together since the installation typically also performs
686 an uninstall first and will be seeing similar issues to the uninstall.
687 """
688
689 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
690 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
691 return True;
692
693 atSimple = self.katSimpleInstallUninstallMainLogReasons;
694 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
695 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
696
697 fFoundSomething = False;
698 for fStopOnHit, tReason, sNeedle in atSimple:
699 if sResultLog.find(sNeedle) > 0:
700 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
701 if fStopOnHit:
702 return True;
703 fFoundSomething = True;
704
705 return fFoundSomething if fFoundSomething else None;
706
707
708 def investigateBadTestBox(self, oCaseFile):
709 """
710 Checks out bad-testbox statuses.
711 """
712 _ = oCaseFile;
713 return False;
714
715
716 def investigateVBoxUnitTest(self, oCaseFile):
717 """
718 Checks out a VBox unittest problem.
719 """
720
721 #
722 # Process simple test case failures first, using their name as reason.
723 # We do the reason management just like for BSODs.
724 #
725 cRelevantOnes = 0;
726 sMainLog = oCaseFile.getMainLog();
727 aoFailedResults = oCaseFile.oTree.getListOfFailures();
728 for oFailedResult in aoFailedResults:
729 if oFailedResult is oCaseFile.oTree:
730 self.vprint('TODO: toplevel failure');
731 cRelevantOnes += 1
732
733 elif oFailedResult.sName == 'Installing VirtualBox':
734 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
735 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
736 cRelevantOnes += 1
737
738 elif oFailedResult.sName == 'Uninstalling VirtualBox':
739 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
740 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
741 cRelevantOnes += 1
742
743 elif oFailedResult.oParent is not None:
744 # Get the 2nd level node because that's where we'll find the unit test name.
745 while oFailedResult.oParent.oParent is not None:
746 oFailedResult = oFailedResult.oParent;
747
748 # Only report a failure once.
749 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
750 sKey = oFailedResult.sName;
751 if sKey.startswith('testcase/'):
752 sKey = sKey[9:];
753 if sKey in self.asUnitTestReasons:
754 tReason = ( self.ksUnitTestCategory, sKey );
755 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
756 else:
757 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
758 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
759 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
760 cRelevantOnes += 1
761 else:
762 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
763
764 #
765 # If we've caught all the relevant ones by now, report the result.
766 #
767 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
768 return self.caseClosed(oCaseFile);
769 return False;
770
771 def extractGuestCpuStack(self, sInfoText):
772 """
773 Extracts the guest CPU stacks from the input file.
774
775 Returns a dictionary keyed by the CPU number, value being a list of
776 raw stack lines (no header).
777 Returns empty dictionary if no stacks where found.
778 """
779 dRet = {};
780 off = 0;
781 while True:
782 # Find the stack.
783 offStart = sInfoText.find('=== start guest stack VCPU ', off);
784 if offStart < 0:
785 break;
786 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
787 if offEnd >= 0:
788 offEnd += 3;
789 else:
790 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
791 if offEnd < 0:
792 offEnd = len(sInfoText);
793
794 sStack = sInfoText[offStart : offEnd];
795 sStack = sStack.replace('\r',''); # paranoia
796 asLines = sStack.split('\n');
797
798 # Figure the CPU.
799 asWords = asLines[0].split();
800 if len(asWords) < 6 or not asWords[5].isdigit():
801 break;
802 iCpu = int(asWords[5]);
803
804 # Add it and advance.
805 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
806 off = offEnd;
807 return dRet;
808
809 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
810 """ Investigates kvm_lock_spinning deadlocks """
811 #
812 # Extract the stacks. We need more than one CPU to create a deadlock.
813 #
814 dStacks = self.extractGuestCpuStack(sInfoText);
815 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
816 if len(dStacks) >= 2:
817 #
818 # Examin each of the stacks. Each must have kvm_lock_spinning in
819 # one of the first three entries.
820 #
821 cHits = 0;
822 for iCpu in dStacks:
823 asBacktrace = dStacks[iCpu];
824 for iFrame in xrange(min(3, len(asBacktrace))):
825 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
826 cHits += 1;
827 break;
828 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
829 if cHits == len(dStacks):
830 return (True, self.ktReason_VMM_kvm_lock_spinning);
831
832 _ = dLogs; _ = oCaseFile;
833 return (False, None);
834
835 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
836 """ Investigates HalReturnToFirmware hangs """
837 # hope that's sufficient
838 return (True, ktReason_Unknown_HalReturnToFirmware);
839
840 ## Things we search a main or VM log for to figure out why something went bust.
841 katSimpleMainAndVmLogReasons = [
842 # ( Whether to stop on hit, reason tuple, needle text. )
843 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
844 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
845 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
846 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
847 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
848 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
849 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
850 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
851 ( True, ktReason_Networking_Nonexistent_host_nic,
852 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
853 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
854 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
855 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
856 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
857 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
858 ( True, ktReason_Unknown_File_Not_Found,
859 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
860 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
861 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
862 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
863 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
864 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
865 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
866 ];
867
868 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
869 katSimpleVBoxHardeningLogReasons = [
870 # ( Whether to stop on hit, reason tuple, needle text. )
871 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
872 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
873 ];
874
875 ## Things we search a kernel.log file for to figure out why something went bust.
876 katSimpleKernelLogReasons = [
877 # ( Whether to stop on hit, reason tuple, needle text. )
878 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
879 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
880 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
881 ];
882
883 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
884 katSimpleVgaTextReasons = [
885 # ( Whether to stop on hit, reason tuple, needle text. )
886 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
887 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
888 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
889 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
890 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
891 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
892 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
893 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
894 "and send a report. Then try booting with the 'noapic' option\n"
895 "\n" ),
896 ( True, ktReason_OSInstall_GRUB_hang,
897 "-----\nGRUB Loading stage2..\n\n\n\n" ),
898 ( True, ktReason_Panic_BootManagerC000000F,
899 "Windows failed to start. A recent hardware or software change might be the" ),
900 ( True, ktReason_BootManager_Image_corrupt,
901 "BOOTMGR image is corrupt. The system cannot boot." ),
902 ];
903
904 ## Things we search for in the info.txt file. Require handlers for now.
905 katInfoTextHandlers = [
906 # ( Trigger text, handler method )
907 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
908 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
909 ];
910
911 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
912 katSimpleScreenshotHashReasons = [
913 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
914 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
915 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
916 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
917 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
918 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
919 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
920 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
921 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
922 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
923 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
924 ];
925
926 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
927 """
928 Investigates a failed VM run.
929 """
930
931 def investigateLogSet():
932 """
933 Investigates the current set of VM related logs.
934 """
935 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
936 % ( len(sResultLog if sResultLog else ''),
937 len(sVMLog if sVMLog else ''),
938 len(sKrnlLog if sKrnlLog else ''),
939 len(sVgaText if sVgaText else ''),
940 len(sInfoText if sInfoText else ''), ));
941
942 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
943 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
944 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
945 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
946 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
947
948 # TODO: more
949
950 #
951 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
952 #
953 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
954 'GIM: HyperV: Guest indicates a fatal condition! P0=');
955 if sDetails is not None:
956 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
957 sKey = sDetails.split(' ', 1)[0];
958 try: sKey = '0x%08X' % (int(sKey, 16),);
959 except: pass;
960 if sKey in self.asBsodReasons:
961 tReason = ( self.ksBsodCategory, sKey );
962 elif sKey.lower() in self.asBsodReasons: # just in case.
963 tReason = ( self.ksBsodCategory, sKey.lower() );
964 else:
965 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
966 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
967 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
968
969 #
970 # Look for linux panic.
971 #
972 if sKrnlLog is not None:
973 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
974 if sKrnlLog.find(sNeedle) > 0:
975 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
976 if fStopOnHit:
977 return True;
978 fFoundSomething = True;
979
980 #
981 # Loop thru the simple stuff.
982 #
983 fFoundSomething = False;
984 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
985 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
986 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
987 if fStopOnHit:
988 return True;
989 fFoundSomething = True;
990
991 # Continue with vga text.
992 if sVgaText:
993 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
994 if sVgaText.find(sNeedle) > 0:
995 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
996 if fStopOnHit:
997 return True;
998 fFoundSomething = True;
999 _ = sInfoText;
1000
1001 # Continue with screen hashes.
1002 if sScreenHash is not None:
1003 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1004 if sScreenHash == sHash:
1005 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1006 if fStopOnHit:
1007 return True;
1008 fFoundSomething = True;
1009
1010 # Check VBoxHardening.log.
1011 if sNtHardLog is not None:
1012 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
1013 if sNtHardLog.find(sNeedle) > 0:
1014 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1015 if fStopOnHit:
1016 return True;
1017 fFoundSomething = True;
1018
1019 #
1020 # Complicated stuff.
1021 #
1022 dLogs = {
1023 'sVMLog': sVMLog,
1024 'sNtHardLog': sNtHardLog,
1025 'sScreenHash': sScreenHash,
1026 'sKrnlLog': sKrnlLog,
1027 'sVgaText': sVgaText,
1028 'sInfoText': sInfoText,
1029 };
1030
1031 # info.txt.
1032 if sInfoText:
1033 for sNeedle, fnHandler in self.katInfoTextHandlers:
1034 if sInfoText.find(sNeedle) > 0:
1035 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1036 if tReason is not None:
1037 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1038 if fStop:
1039 return True;
1040 fFoundSomething = True;
1041
1042 #
1043 # Check for repeated reboots...
1044 #
1045 if sVMLog is not None:
1046 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1047 if cResets > 10:
1048 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1049 sComment = 'Counted %s reboots' % (cResets,));
1050
1051 return fFoundSomething;
1052
1053 #
1054 # Check if we got any VM or/and kernel logs. Treat them as sets in
1055 # case we run multiple VMs here (this is of course ASSUMING they
1056 # appear in the order that terminateVmBySession uploads them).
1057 #
1058 cTimes = 0;
1059 sVMLog = None;
1060 sNtHardLog = None;
1061 sScreenHash = None;
1062 sKrnlLog = None;
1063 sVgaText = None;
1064 sInfoText = None;
1065 for oFile in oFailedResult.aoFiles:
1066 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1067 if 'VBoxHardening.log' not in oFile.sFile:
1068 if sVMLog is not None:
1069 if investigateLogSet() is True:
1070 return True;
1071 cTimes += 1;
1072 sInfoText = None;
1073 sVgaText = None;
1074 sKrnlLog = None;
1075 sScreenHash = None;
1076 sNtHardLog = None;
1077 sVMLog = oCaseFile.getLogFile(oFile);
1078 else:
1079 sNtHardLog = oCaseFile.getLogFile(oFile);
1080 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1081 sKrnlLog = oCaseFile.getLogFile(oFile);
1082 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1083 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1084 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1085 sInfoText = oCaseFile.getLogFile(oFile);
1086 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1087 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1088 if sScreenHash is not None:
1089 sScreenHash = sScreenHash.lower();
1090 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1091
1092 if ( sVMLog is not None \
1093 or sNtHardLog is not None \
1094 or cTimes == 0) \
1095 and investigateLogSet() is True:
1096 return True;
1097
1098 return None;
1099
1100
1101 def isResultFromVMRun(self, oFailedResult, sResultLog):
1102 """
1103 Checks if this result and corresponding log snippet looks like a VM run.
1104 """
1105
1106 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1107 if sResultLog.find(' startVm') > 0:
1108 return True;
1109
1110 # Any other indicators? No?
1111 _ = oFailedResult;
1112 return False;
1113
1114 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1115 """
1116 Checks out a VBox VM test.
1117
1118 This is generic investigation of a test running one or more VMs, like
1119 for example a smoke test or a guest installation test.
1120
1121 The fSingleVM parameter is a hint, which probably won't come in useful.
1122 """
1123 _ = fSingleVM;
1124
1125 #
1126 # Get a list of test result failures we should be looking into and the main log.
1127 #
1128 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1129 sMainLog = oCaseFile.getMainLog();
1130
1131 #
1132 # There are a set of errors ending up on the top level result record.
1133 # Should deal with these first.
1134 #
1135 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1136 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1137 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1138 # ignore other failures in the test if we're not a little bit careful here.
1139 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1140 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1141 return self.caseClosed(oCaseFile);
1142
1143 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1144 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1145 (' Timeout: ', ' Attempting to abort child...',) ):
1146 if sMainLog.find('*** glibc detected *** /') > 0:
1147 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1148 else:
1149 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1150 return self.caseClosed(oCaseFile);
1151
1152 # Look for heap corruption without visible hang.
1153 if sMainLog.find('*** glibc detected *** /') > 0 \
1154 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1155 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1156 return self.caseClosed(oCaseFile);
1157
1158 # Out of memory w/ timeout.
1159 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1160 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1161 return self.caseClosed(oCaseFile);
1162
1163 # Stale files like vts_rm.exe (windows).
1164 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1165 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1166 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1167 return self.caseClosed(oCaseFile);
1168
1169 #
1170 # XPCOM screwup
1171 #
1172 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1173 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1174 return self.caseClosed(oCaseFile);
1175
1176 #
1177 # Go thru each failed result.
1178 #
1179 for oFailedResult in aoFailedResults:
1180 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1181 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1182 if oFailedResult.sName == 'Installing VirtualBox':
1183 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1184
1185 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1186 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1187
1188 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1189 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1190
1191 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1192 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1193
1194 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1195 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1196 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1197
1198 elif sResultLog.find('** error: no action was specified') > 0 \
1199 or sResultLog.find('(len(self._asXml, asText))') > 0:
1200 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1201
1202 else:
1203 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1204 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1205
1206 #
1207 # Report home and close the case if we got them all, otherwise log it.
1208 #
1209 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1210 return self.caseClosed(oCaseFile);
1211
1212 if oCaseFile.dReasonForResultId:
1213 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1214 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1215 else:
1216 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1217 return False;
1218
1219
1220 def reasoningFailures(self):
1221 """
1222 Guess the reason for failures.
1223 """
1224 #
1225 # Get a list of failed test sets without any assigned failure reason.
1226 #
1227 cGot = 0;
1228 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1229 for oTestSet in aoTestSets:
1230 self.dprint(u'');
1231 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1232
1233 #
1234 # Open a case file and assign it to the right investigator.
1235 #
1236 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1237 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1238 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1239 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1240 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1241
1242 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1243
1244 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1245 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1246 fRc = self.investigateBadTestBox(oCaseFile);
1247
1248 elif oCaseFile.isVBoxUnitTest():
1249 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1250 fRc = self.investigateVBoxUnitTest(oCaseFile);
1251
1252 elif oCaseFile.isVBoxInstallTest():
1253 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1254 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1255
1256 elif oCaseFile.isVBoxUSBTest():
1257 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1258 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1259
1260 elif oCaseFile.isVBoxStorageTest():
1261 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1262 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1263
1264 elif oCaseFile.isVBoxGAsTest():
1265 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1266 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1267
1268 elif oCaseFile.isVBoxAPITest():
1269 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1270 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1271
1272 elif oCaseFile.isVBoxBenchmarkTest():
1273 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1274 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1275
1276 elif oCaseFile.isVBoxSmokeTest():
1277 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1278 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1279
1280 else:
1281 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1282 fRc = False;
1283 cGot += fRc is True;
1284
1285 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1286 return 0;
1287
1288
1289 def main(self):
1290 """
1291 The 'main' function.
1292 Return exit code (0, 1, etc).
1293 """
1294 # Database stuff.
1295 self.oDb = TMDatabaseConnection()
1296 self.oTestResultLogic = TestResultLogic(self.oDb);
1297 self.oTestSetLogic = TestSetLogic(self.oDb);
1298 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1299 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1300 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1301 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1302
1303 # Get a fix on our 'now' before we do anything..
1304 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1305 self.tsNow = self.oDb.fetchOne();
1306
1307 # If we're suppost to commit anything we need to get our user ID.
1308 rcExit = 0;
1309 if self.oConfig.fRealRun:
1310 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1311 if self.oLogin is None:
1312 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1313 else:
1314 self.uidSelf = self.oLogin.uid;
1315
1316 #
1317 # Do the stuff.
1318 #
1319 if rcExit == 0:
1320 rcExit = self.selfCheck();
1321 if rcExit == 0:
1322 rcExit = self.badTestBoxManagement();
1323 rcExit2 = self.reasoningFailures();
1324 if rcExit == 0:
1325 rcExit = rcExit2;
1326 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1327 if rcExit == 0:
1328 rcExit = self.badTestBoxManagement();
1329
1330 # Cleanup.
1331 self.oFailureReasonLogic = None;
1332 self.oTestResultFailureLogic = None;
1333 self.oTestSetLogic = None;
1334 self.oTestResultLogic = None;
1335 self.oDb.close();
1336 self.oDb = None;
1337 if self.oLogFile is not None:
1338 self.oLogFile.close();
1339 self.oLogFile = None;
1340 return rcExit;
1341
1342if __name__ == '__main__':
1343 sys.exit(VirtualTestSheriff().main());
1344
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette