VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 68223

Last change on this file since 68223 was 68223, checked in by vboxsync, 7 years ago

vsheriff: detect BSOD 0x7F

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 63.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 68223 2017-08-02 07:21:47Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 68223 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if self.sMainLog:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 68223 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 self.getFailureReason(self.ktReason_Host_DriverNotCompilable).idFailureReason,
376 ];
377
378 #
379 # Get list of bad test boxes for given period and check them out individually.
380 #
381 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
382 aidFailureReasons = aidFailureReasons);
383 for idTestBox in aidBadTestBoxes:
384 # Skip if the testbox is already disabled or has a pending reboot command.
385 try:
386 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
387 except Exception as oXcpt:
388 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
389 continue;
390 if not oTestBox.fEnabled:
391 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
392 % ( idTestBox, oTestBox.sName, ));
393 continue;
394 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
395 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
396 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
397 continue;
398
399 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
400 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
401 cOkay = 0;
402 cBad = 0;
403 iFirstOkay = len(aoSets);
404 for iSet, oSet in enumerate(aoSets):
405 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
406 cBad += 1;
407 else:
408 # Check for bad failure reasons.
409 oFailure = None;
410 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
411 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
412 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
413 cBad += 1;
414 else:
415 # This is an okay test result then.
416 ## @todo maybe check the elapsed time here, it could still be a bad run?
417 cOkay += 1;
418 if iFirstOkay > iSet:
419 iFirstOkay = iSet;
420 if iSet > 10:
421 break;
422
423 # We react if there are two or more bad-testbox statuses at the head of the
424 # history and at least three in the last 10 results.
425 if iFirstOkay >= 2 and cBad > 2:
426 # Frank: For now don't reboot boxes automatically
427 if True or oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
428 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
429 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
430 if self.oConfig.fRealRun is True:
431 try:
432 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
433 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
434 % (iFirstOkay, cBad, cOkay),);
435 except Exception as oXcpt:
436 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
437 else:
438 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
439 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
440 if self.oConfig.fRealRun is True:
441 try:
442 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
443 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
444 % (iFirstOkay, cBad, cOkay),);
445 except Exception as oXcpt:
446 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
447 else:
448 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
449 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
450 return rcExit;
451
452
453 ## @name Failure reasons we know.
454 ## @{
455 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
456 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
457 ktReason_BSOD_0000007F = ( 'BSOD', '0x0000007F' );
458 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
459 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
460 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
461 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
462 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
463 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
464 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
465 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
466 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
467 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
468 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
469 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
470 ktReason_Host_DriverNotCompilable = ( 'Host', 'Driver not compilable' );
471 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
472 ktReason_Host_DoubleFreeHeap = ( 'Host', 'Double free or corruption' );
473 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
474 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
475 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
476 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
477 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
478 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
479 ktReason_OSInstall_Sata_no_BM = ( 'O/S Install', 'SATA busmaster bit not set' );
480 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
481 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
482 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
483 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
484 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
485 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
486 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
487 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
488 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
489 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
490 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
491 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
492 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
493 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
494 ktReason_Unknown_HalReturnToFirmware = ( 'Unknown', 'HalReturnToFirmware' );
495 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
496 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
497 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
498 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
499 ## @}
500
501 ## BSOD category.
502 ksBsodCategory = 'BSOD';
503 ## Special reason indicating that the flesh and blood sheriff has work to do.
504 ksBsodAddNew = 'Add new BSOD';
505
506 ## Unit test category.
507 ksUnitTestCategory = 'Unit';
508 ## Special reason indicating that the flesh and blood sheriff has work to do.
509 ksUnitTestAddNew = 'Add new';
510
511 ## Used for indica that we shouldn't report anything for this test result ID and
512 ## consider promoting the previous error to test set level if it's the only one.
513 ktHarmless = ( 'Probably', 'Caused by previous error' );
514
515
516 def caseClosed(self, oCaseFile):
517 """
518 Reports the findings in the case and closes it.
519 """
520 #
521 # Log it and create a dReasonForReasultId we can use below.
522 #
523 dCommentForResultId = oCaseFile.dCommentForResultId;
524 if oCaseFile.dReasonForResultId:
525 # Must weed out ktHarmless.
526 dReasonForResultId = {};
527 for idKey, tReason in oCaseFile.dReasonForResultId.items():
528 if tReason is not self.ktHarmless:
529 dReasonForResultId[idKey] = tReason;
530 if not dReasonForResultId:
531 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
532 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
533 return False;
534
535 # Try promote to single reason.
536 atValues = dReasonForResultId.values();
537 fSingleReason = True;
538 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
539 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
540 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
541 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
542 else:
543 fSingleReason = False;
544 if fSingleReason:
545 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
546 if dCommentForResultId:
547 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
548 elif oCaseFile.tReason is not None:
549 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
550 else:
551 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
552 return False;
553
554 self.vprint(u'Closing %s with following reason%s: %s'
555 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
556
557 #
558 # Add the test failure reason record(s).
559 #
560 for idTestResult, tReason in dReasonForResultId.items():
561 oFailureReason = self.getFailureReason(tReason);
562 if oFailureReason is not None:
563 sComment = 'Set by $Revision: 68223 $' # Handy for reverting later.
564 if idTestResult in dCommentForResultId:
565 sComment += ': ' + dCommentForResultId[idTestResult];
566
567 oAdd = TestResultFailureData();
568 oAdd.initFromValues(idTestResult = idTestResult,
569 idFailureReason = oFailureReason.idFailureReason,
570 uidAuthor = self.uidSelf,
571 idTestSet = oCaseFile.oTestSet.idTestSet,
572 sComment = sComment,);
573 if self.oConfig.fRealRun:
574 try:
575 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
576 except Exception as oXcpt:
577 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
578 % (oXcpt, oAdd, oCaseFile.sLongName,));
579 else:
580 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
581 return True;
582
583 #
584 # Tools for assiting log parsing.
585 #
586
587 @staticmethod
588 def matchFollowedByLines(sStr, off, asFollowingLines):
589 """ Worker for isThisFollowedByTheseLines. """
590
591 # Advance off to the end of the line.
592 off = sStr.find('\n', off);
593 if off < 0:
594 return False;
595 off += 1;
596
597 # Match each string with the subsequent lines.
598 for iLine, sLine in enumerate(asFollowingLines):
599 offEnd = sStr.find('\n', off);
600 if offEnd < 0:
601 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
602 if sLine and sStr.find(sLine, off, offEnd) < 0:
603 return False;
604
605 # next line.
606 off = offEnd + 1;
607
608 return True;
609
610 @staticmethod
611 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
612 """
613 Looks for a line contining sFirst which is then followed by lines
614 with the strings in asFollowingLines. (No newline chars anywhere!)
615 Returns True / False.
616 """
617 off = sStr.find(sFirst, 0);
618 while off >= 0:
619 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
620 return True;
621 off = sStr.find(sFirst, off + 1);
622 return False;
623
624 @staticmethod
625 def findAndReturnRestOfLine(sHaystack, sNeedle):
626 """
627 Looks for sNeedle in sHaystack.
628 Returns The text following the needle up to the end of the line.
629 Returns None if not found.
630 """
631 if sHaystack is None:
632 return None;
633 off = sHaystack.find(sNeedle);
634 if off < 0:
635 return None;
636 off += len(sNeedle)
637 offEol = sHaystack.find('\n', off);
638 if offEol < 0:
639 offEol = len(sHaystack);
640 return sHaystack[off:offEol]
641
642 @staticmethod
643 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
644 """
645 Looks for sNeedle in zeroe or more haystacks (asHaystack).
646 Returns The text following the first needed found up to the end of the line.
647 Returns None if not found.
648 """
649 for sHaystack in asHaystacks:
650 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
651 if sRet is not None:
652 return sRet;
653 return None;
654
655
656 #
657 # The investigative units.
658 #
659
660 katSimpleInstallUninstallMainLogReasons = [
661 # ( Whether to stop on hit, reason tuple, needle text. )
662 ( False, ktReason_Host_LeftoverService,
663 'SERVICE_NAME: vbox' ),
664 ];
665
666 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
667 'darwin': [
668 # ( Whether to stop on hit, reason tuple, needle text. )
669 ( True, ktReason_Host_DriverNotUnloading,
670 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
671 ],
672 'linux': [
673 # ( Whether to stop on hit, reason tuple, needle text. )
674 ( True, ktReason_Host_DriverNotCompilable,
675 'This system is not currently set up to build kernel modules' ),
676 ( True, ktReason_Host_DriverNotCompilable,
677 'This system is currently not set up to build kernel modules' ),
678 ( True, ktReason_Host_DriverNotUnloading,
679 'Cannot unload module vboxdrv'),
680 ],
681 };
682
683
684 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
685 """
686 Investigates an install or uninstall failure.
687
688 We lump the two together since the installation typically also performs
689 an uninstall first and will be seeing similar issues to the uninstall.
690 """
691
692 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
693 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
694 return True;
695
696 atSimple = self.katSimpleInstallUninstallMainLogReasons;
697 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
698 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
699
700 fFoundSomething = False;
701 for fStopOnHit, tReason, sNeedle in atSimple:
702 if sResultLog.find(sNeedle) > 0:
703 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
704 if fStopOnHit:
705 return True;
706 fFoundSomething = True;
707
708 return fFoundSomething if fFoundSomething else None;
709
710
711 def investigateBadTestBox(self, oCaseFile):
712 """
713 Checks out bad-testbox statuses.
714 """
715 _ = oCaseFile;
716 return False;
717
718
719 def investigateVBoxUnitTest(self, oCaseFile):
720 """
721 Checks out a VBox unittest problem.
722 """
723
724 #
725 # Process simple test case failures first, using their name as reason.
726 # We do the reason management just like for BSODs.
727 #
728 cRelevantOnes = 0;
729 sMainLog = oCaseFile.getMainLog();
730 aoFailedResults = oCaseFile.oTree.getListOfFailures();
731 for oFailedResult in aoFailedResults:
732 if oFailedResult is oCaseFile.oTree:
733 self.vprint('TODO: toplevel failure');
734 cRelevantOnes += 1
735
736 elif oFailedResult.sName == 'Installing VirtualBox':
737 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
738 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
739 cRelevantOnes += 1
740
741 elif oFailedResult.sName == 'Uninstalling VirtualBox':
742 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
743 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
744 cRelevantOnes += 1
745
746 elif oFailedResult.oParent is not None:
747 # Get the 2nd level node because that's where we'll find the unit test name.
748 while oFailedResult.oParent.oParent is not None:
749 oFailedResult = oFailedResult.oParent;
750
751 # Only report a failure once.
752 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
753 sKey = oFailedResult.sName;
754 if sKey.startswith('testcase/'):
755 sKey = sKey[9:];
756 if sKey in self.asUnitTestReasons:
757 tReason = ( self.ksUnitTestCategory, sKey );
758 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
759 else:
760 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
761 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
762 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
763 cRelevantOnes += 1
764 else:
765 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
766
767 #
768 # If we've caught all the relevant ones by now, report the result.
769 #
770 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
771 return self.caseClosed(oCaseFile);
772 return False;
773
774 def extractGuestCpuStack(self, sInfoText):
775 """
776 Extracts the guest CPU stacks from the input file.
777
778 Returns a dictionary keyed by the CPU number, value being a list of
779 raw stack lines (no header).
780 Returns empty dictionary if no stacks where found.
781 """
782 dRet = {};
783 off = 0;
784 while True:
785 # Find the stack.
786 offStart = sInfoText.find('=== start guest stack VCPU ', off);
787 if offStart < 0:
788 break;
789 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
790 if offEnd >= 0:
791 offEnd += 3;
792 else:
793 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
794 if offEnd < 0:
795 offEnd = len(sInfoText);
796
797 sStack = sInfoText[offStart : offEnd];
798 sStack = sStack.replace('\r',''); # paranoia
799 asLines = sStack.split('\n');
800
801 # Figure the CPU.
802 asWords = asLines[0].split();
803 if len(asWords) < 6 or not asWords[5].isdigit():
804 break;
805 iCpu = int(asWords[5]);
806
807 # Add it and advance.
808 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
809 off = offEnd;
810 return dRet;
811
812 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
813 """ Investigates kvm_lock_spinning deadlocks """
814 #
815 # Extract the stacks. We need more than one CPU to create a deadlock.
816 #
817 dStacks = self.extractGuestCpuStack(sInfoText);
818 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
819 if len(dStacks) >= 2:
820 #
821 # Examin each of the stacks. Each must have kvm_lock_spinning in
822 # one of the first three entries.
823 #
824 cHits = 0;
825 for iCpu in dStacks:
826 asBacktrace = dStacks[iCpu];
827 for iFrame in xrange(min(3, len(asBacktrace))):
828 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
829 cHits += 1;
830 break;
831 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
832 if cHits == len(dStacks):
833 return (True, self.ktReason_VMM_kvm_lock_spinning);
834
835 _ = dLogs; _ = oCaseFile;
836 return (False, None);
837
838 def investigateInfoHalReturnToFirmware(self, oCaseFile, sInfoText, dLogs):
839 """ Investigates HalReturnToFirmware hangs """
840 del oCaseFile
841 del sInfoText
842 del dLogs
843 # hope that's sufficient
844 return (True, self.ktReason_Unknown_HalReturnToFirmware);
845
846 ## Things we search a main or VM log for to figure out why something went bust.
847 katSimpleMainAndVmLogReasons = [
848 # ( Whether to stop on hit, reason tuple, needle text. )
849 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
850 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
851 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
852 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
853 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
854 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
855 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
856 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
857 ( True, ktReason_Networking_Nonexistent_host_nic,
858 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
859 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
860 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
861 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
862 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
863 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
864 ( True, ktReason_Unknown_File_Not_Found,
865 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
866 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
867 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
868 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
869 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' ),
870 ( True, ktReason_OSInstall_Sata_no_BM, 'PCHS=14128/14134/8224' ),
871 ( True, ktReason_Host_DoubleFreeHeap, 'double free or corruption' ),
872 ];
873
874 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
875 katSimpleVBoxHardeningLogReasons = [
876 # ( Whether to stop on hit, reason tuple, needle text. )
877 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
878 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
879 ];
880
881 ## Things we search a kernel.log file for to figure out why something went bust.
882 katSimpleKernelLogReasons = [
883 # ( Whether to stop on hit, reason tuple, needle text. )
884 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
885 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
886 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
887 ];
888
889 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
890 katSimpleVgaTextReasons = [
891 # ( Whether to stop on hit, reason tuple, needle text. )
892 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
893 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
894 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
895 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
896 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
897 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
898 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
899 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
900 "and send a report. Then try booting with the 'noapic' option\n"
901 "\n" ),
902 ( True, ktReason_OSInstall_GRUB_hang,
903 "-----\nGRUB Loading stage2..\n\n\n\n" ),
904 ( True, ktReason_Panic_BootManagerC000000F,
905 "Windows failed to start. A recent hardware or software change might be the" ),
906 ( True, ktReason_BootManager_Image_corrupt,
907 "BOOTMGR image is corrupt. The system cannot boot." ),
908 ];
909
910 ## Things we search for in the info.txt file. Require handlers for now.
911 katInfoTextHandlers = [
912 # ( Trigger text, handler method )
913 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
914 ( "HalReturnToFirmware", investigateInfoHalReturnToFirmware ),
915 ];
916
917 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
918 katSimpleScreenshotHashReasons = [
919 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
920 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
921 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
922 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
923 ( True, ktReason_BSOD_0000007F, '57e1880619e13042a87100e7a38c8974b85ce3866501be621bea0cc696bb2c63' ),
924 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
925 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
926 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
927 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
928 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
929 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
930 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
931 ];
932
933 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
934 """
935 Investigates a failed VM run.
936 """
937
938 def investigateLogSet():
939 """
940 Investigates the current set of VM related logs.
941 """
942 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
943 % ( len(sResultLog if sResultLog else ''),
944 len(sVMLog if sVMLog else ''),
945 len(sKrnlLog if sKrnlLog else ''),
946 len(sVgaText if sVgaText else ''),
947 len(sInfoText if sInfoText else ''), ));
948
949 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
950 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
951 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
952 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
953 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
954
955 # TODO: more
956
957 #
958 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
959 #
960 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
961 'GIM: HyperV: Guest indicates a fatal condition! P0=');
962 if sDetails is not None:
963 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
964 sKey = sDetails.split(' ', 1)[0];
965 try: sKey = '0x%08X' % (int(sKey, 16),);
966 except: pass;
967 if sKey in self.asBsodReasons:
968 tReason = ( self.ksBsodCategory, sKey );
969 elif sKey.lower() in self.asBsodReasons: # just in case.
970 tReason = ( self.ksBsodCategory, sKey.lower() );
971 else:
972 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
973 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
974 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
975
976 #
977 # Look for linux panic.
978 #
979 if sKrnlLog is not None:
980 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
981 if sKrnlLog.find(sNeedle) > 0:
982 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
983 if fStopOnHit:
984 return True;
985 fFoundSomething = True;
986
987 #
988 # Loop thru the simple stuff.
989 #
990 fFoundSomething = False;
991 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
992 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
993 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
994 if fStopOnHit:
995 return True;
996 fFoundSomething = True;
997
998 # Continue with vga text.
999 if sVgaText:
1000 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
1001 if sVgaText.find(sNeedle) > 0:
1002 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1003 if fStopOnHit:
1004 return True;
1005 fFoundSomething = True;
1006 _ = sInfoText;
1007
1008 # Continue with screen hashes.
1009 if sScreenHash is not None:
1010 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
1011 if sScreenHash == sHash:
1012 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1013 if fStopOnHit:
1014 return True;
1015 fFoundSomething = True;
1016
1017 # Check VBoxHardening.log.
1018 if sNtHardLog is not None:
1019 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
1020 if sNtHardLog.find(sNeedle) > 0:
1021 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1022 if fStopOnHit:
1023 return True;
1024 fFoundSomething = True;
1025
1026 #
1027 # Complicated stuff.
1028 #
1029 dLogs = {
1030 'sVMLog': sVMLog,
1031 'sNtHardLog': sNtHardLog,
1032 'sScreenHash': sScreenHash,
1033 'sKrnlLog': sKrnlLog,
1034 'sVgaText': sVgaText,
1035 'sInfoText': sInfoText,
1036 };
1037
1038 # info.txt.
1039 if sInfoText:
1040 for sNeedle, fnHandler in self.katInfoTextHandlers:
1041 if sInfoText.find(sNeedle) > 0:
1042 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1043 if tReason is not None:
1044 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1045 if fStop:
1046 return True;
1047 fFoundSomething = True;
1048
1049 #
1050 # Check for repeated reboots...
1051 #
1052 if sVMLog is not None:
1053 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1054 if cResets > 10:
1055 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1056 sComment = 'Counted %s reboots' % (cResets,));
1057
1058 return fFoundSomething;
1059
1060 #
1061 # Check if we got any VM or/and kernel logs. Treat them as sets in
1062 # case we run multiple VMs here (this is of course ASSUMING they
1063 # appear in the order that terminateVmBySession uploads them).
1064 #
1065 cTimes = 0;
1066 sVMLog = None;
1067 sNtHardLog = None;
1068 sScreenHash = None;
1069 sKrnlLog = None;
1070 sVgaText = None;
1071 sInfoText = None;
1072 for oFile in oFailedResult.aoFiles:
1073 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1074 if 'VBoxHardening.log' not in oFile.sFile:
1075 if sVMLog is not None:
1076 if investigateLogSet() is True:
1077 return True;
1078 cTimes += 1;
1079 sInfoText = None;
1080 sVgaText = None;
1081 sKrnlLog = None;
1082 sScreenHash = None;
1083 sNtHardLog = None;
1084 sVMLog = oCaseFile.getLogFile(oFile);
1085 else:
1086 sNtHardLog = oCaseFile.getLogFile(oFile);
1087 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1088 sKrnlLog = oCaseFile.getLogFile(oFile);
1089 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1090 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1091 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1092 sInfoText = oCaseFile.getLogFile(oFile);
1093 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1094 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1095 if sScreenHash is not None:
1096 sScreenHash = sScreenHash.lower();
1097 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1098
1099 if ( sVMLog is not None \
1100 or sNtHardLog is not None \
1101 or cTimes == 0) \
1102 and investigateLogSet() is True:
1103 return True;
1104
1105 return None;
1106
1107
1108 def isResultFromVMRun(self, oFailedResult, sResultLog):
1109 """
1110 Checks if this result and corresponding log snippet looks like a VM run.
1111 """
1112
1113 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1114 if sResultLog.find(' startVm') > 0:
1115 return True;
1116
1117 # Any other indicators? No?
1118 _ = oFailedResult;
1119 return False;
1120
1121 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1122 """
1123 Checks out a VBox VM test.
1124
1125 This is generic investigation of a test running one or more VMs, like
1126 for example a smoke test or a guest installation test.
1127
1128 The fSingleVM parameter is a hint, which probably won't come in useful.
1129 """
1130 _ = fSingleVM;
1131
1132 #
1133 # Get a list of test result failures we should be looking into and the main log.
1134 #
1135 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1136 sMainLog = oCaseFile.getMainLog();
1137
1138 #
1139 # There are a set of errors ending up on the top level result record.
1140 # Should deal with these first.
1141 #
1142 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1143 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1144 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1145 # ignore other failures in the test if we're not a little bit careful here.
1146 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1147 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1148 return self.caseClosed(oCaseFile);
1149
1150 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1151 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1152 (' Timeout: ', ' Attempting to abort child...',) ):
1153 if sMainLog.find('*** glibc detected *** /') > 0:
1154 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1155 else:
1156 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1157 return self.caseClosed(oCaseFile);
1158
1159 # Look for heap corruption without visible hang.
1160 if sMainLog.find('*** glibc detected *** /') > 0 \
1161 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1162 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1163 return self.caseClosed(oCaseFile);
1164
1165 # Out of memory w/ timeout.
1166 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1167 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1168 return self.caseClosed(oCaseFile);
1169
1170 # Stale files like vts_rm.exe (windows).
1171 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1172 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1173 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1174 return self.caseClosed(oCaseFile);
1175
1176 #
1177 # XPCOM screwup
1178 #
1179 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1180 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1181 return self.caseClosed(oCaseFile);
1182
1183 #
1184 # Go thru each failed result.
1185 #
1186 for oFailedResult in aoFailedResults:
1187 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1188 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1189 if oFailedResult.sName == 'Installing VirtualBox':
1190 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1191
1192 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1193 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1194
1195 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1196 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1197
1198 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1199 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1200
1201 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1202 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1203 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1204
1205 elif sResultLog.find('** error: no action was specified') > 0 \
1206 or sResultLog.find('(len(self._asXml, asText))') > 0:
1207 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1208
1209 else:
1210 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1211 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1212
1213 #
1214 # Report home and close the case if we got them all, otherwise log it.
1215 #
1216 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1217 return self.caseClosed(oCaseFile);
1218
1219 if oCaseFile.dReasonForResultId:
1220 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1221 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1222 else:
1223 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1224 return False;
1225
1226
1227 def reasoningFailures(self):
1228 """
1229 Guess the reason for failures.
1230 """
1231 #
1232 # Get a list of failed test sets without any assigned failure reason.
1233 #
1234 cGot = 0;
1235 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1236 for oTestSet in aoTestSets:
1237 self.dprint(u'');
1238 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1239
1240 #
1241 # Open a case file and assign it to the right investigator.
1242 #
1243 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1244 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1245 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1246 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1247 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1248
1249 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1250
1251 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1252 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1253 fRc = self.investigateBadTestBox(oCaseFile);
1254
1255 elif oCaseFile.isVBoxUnitTest():
1256 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1257 fRc = self.investigateVBoxUnitTest(oCaseFile);
1258
1259 elif oCaseFile.isVBoxInstallTest():
1260 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1261 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1262
1263 elif oCaseFile.isVBoxUSBTest():
1264 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1265 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1266
1267 elif oCaseFile.isVBoxStorageTest():
1268 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1269 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1270
1271 elif oCaseFile.isVBoxGAsTest():
1272 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1273 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1274
1275 elif oCaseFile.isVBoxAPITest():
1276 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1277 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1278
1279 elif oCaseFile.isVBoxBenchmarkTest():
1280 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1281 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1282
1283 elif oCaseFile.isVBoxSmokeTest():
1284 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1285 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1286
1287 else:
1288 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1289 fRc = False;
1290 cGot += fRc is True;
1291
1292 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1293 return 0;
1294
1295
1296 def main(self):
1297 """
1298 The 'main' function.
1299 Return exit code (0, 1, etc).
1300 """
1301 # Database stuff.
1302 self.oDb = TMDatabaseConnection()
1303 self.oTestResultLogic = TestResultLogic(self.oDb);
1304 self.oTestSetLogic = TestSetLogic(self.oDb);
1305 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1306 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1307 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1308 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1309
1310 # Get a fix on our 'now' before we do anything..
1311 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1312 self.tsNow = self.oDb.fetchOne();
1313
1314 # If we're suppost to commit anything we need to get our user ID.
1315 rcExit = 0;
1316 if self.oConfig.fRealRun:
1317 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1318 if self.oLogin is None:
1319 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1320 else:
1321 self.uidSelf = self.oLogin.uid;
1322
1323 #
1324 # Do the stuff.
1325 #
1326 if rcExit == 0:
1327 rcExit = self.selfCheck();
1328 if rcExit == 0:
1329 rcExit = self.badTestBoxManagement();
1330 rcExit2 = self.reasoningFailures();
1331 if rcExit == 0:
1332 rcExit = rcExit2;
1333 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1334 if rcExit == 0:
1335 rcExit = self.badTestBoxManagement();
1336
1337 # Cleanup.
1338 self.oFailureReasonLogic = None;
1339 self.oTestResultFailureLogic = None;
1340 self.oTestSetLogic = None;
1341 self.oTestResultLogic = None;
1342 self.oDb.close();
1343 self.oDb = None;
1344 if self.oLogFile is not None:
1345 self.oLogFile.close();
1346 self.oLogFile = None;
1347 return rcExit;
1348
1349if __name__ == '__main__':
1350 sys.exit(VirtualTestSheriff().main());
1351
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette