VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 66205

Last change on this file since 66205 was 66205, checked in by vboxsync, 8 years ago

vhseriff: don't reboot testboxes, just disable them

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 60.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 66205 2017-03-22 16:34:42Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 66205 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if self.sMainLog:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 66205 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 ];
376
377 #
378 # Get list of bad test boxes for given period and check them out individually.
379 #
380 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
381 aidFailureReasons = aidFailureReasons);
382 for idTestBox in aidBadTestBoxes:
383 # Skip if the testbox is already disabled or has a pending reboot command.
384 try:
385 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
386 except Exception as oXcpt:
387 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
388 continue;
389 if not oTestBox.fEnabled:
390 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
391 % ( idTestBox, oTestBox.sName, ));
392 continue;
393 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
394 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
395 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
396 continue;
397
398 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
399 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
400 cOkay = 0;
401 cBad = 0;
402 iFirstOkay = len(aoSets);
403 for iSet, oSet in enumerate(aoSets):
404 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
405 cBad += 1;
406 else:
407 # Check for bad failure reasons.
408 oFailure = None;
409 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
410 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
411 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
412 cBad += 1;
413 else:
414 # This is an okay test result then.
415 ## @todo maybe check the elapsed time here, it could still be a bad run?
416 cOkay += 1;
417 if iFirstOkay > iSet:
418 iFirstOkay = iSet;
419 if iSet > 10:
420 break;
421
422 # We react if there are two or more bad-testbox statuses at the head of the
423 # history and at least three in the last 10 results.
424 if iFirstOkay >= 2 and cBad > 2:
425 # Frank: For now don't reboot boxes automatically
426 if True or oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
427 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
428 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
429 if self.oConfig.fRealRun is True:
430 try:
431 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
432 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
433 % (iFirstOkay, cBad, cOkay),);
434 except Exception as oXcpt:
435 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
436 else:
437 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
438 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
439 if self.oConfig.fRealRun is True:
440 try:
441 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
442 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
443 % (iFirstOkay, cBad, cOkay),);
444 except Exception as oXcpt:
445 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
446 else:
447 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
448 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
449 return rcExit;
450
451
452 ## @name Failure reasons we know.
453 ## @{
454 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
455 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
456 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
457 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
458 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
459 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
460 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
461 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
462 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
463 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
464 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
465 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
466 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
467 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
468 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
469 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
470 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
471 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
472 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
473 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
474 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
475 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
476 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
477 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
478 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
479 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
480 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
481 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
482 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
483 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
484 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
485 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
486 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
487 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
488 ## @}
489
490 ## BSOD category.
491 ksBsodCategory = 'BSOD';
492 ## Special reason indicating that the flesh and blood sheriff has work to do.
493 ksBsodAddNew = 'Add new BSOD';
494
495 ## Unit test category.
496 ksUnitTestCategory = 'Unit';
497 ## Special reason indicating that the flesh and blood sheriff has work to do.
498 ksUnitTestAddNew = 'Add new';
499
500 ## Used for indica that we shouldn't report anything for this test result ID and
501 ## consider promoting the previous error to test set level if it's the only one.
502 ktHarmless = ( 'Probably', 'Caused by previous error' );
503
504
505 def caseClosed(self, oCaseFile):
506 """
507 Reports the findings in the case and closes it.
508 """
509 #
510 # Log it and create a dReasonForReasultId we can use below.
511 #
512 dCommentForResultId = oCaseFile.dCommentForResultId;
513 if oCaseFile.dReasonForResultId:
514 # Must weed out ktHarmless.
515 dReasonForResultId = {};
516 for idKey, tReason in oCaseFile.dReasonForResultId.items():
517 if tReason is not self.ktHarmless:
518 dReasonForResultId[idKey] = tReason;
519 if not dReasonForResultId:
520 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
521 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
522 return False;
523
524 # Try promote to single reason.
525 atValues = dReasonForResultId.values();
526 fSingleReason = True;
527 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
528 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
529 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
530 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
531 else:
532 fSingleReason = False;
533 if fSingleReason:
534 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
535 if dCommentForResultId:
536 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
537 elif oCaseFile.tReason is not None:
538 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
539 else:
540 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
541 return False;
542
543 self.vprint(u'Closing %s with following reason%s: %s'
544 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
545
546 #
547 # Add the test failure reason record(s).
548 #
549 for idTestResult, tReason in dReasonForResultId.items():
550 oFailureReason = self.getFailureReason(tReason);
551 if oFailureReason is not None:
552 sComment = 'Set by $Revision: 66205 $' # Handy for reverting later.
553 if idTestResult in dCommentForResultId:
554 sComment += ': ' + dCommentForResultId[idTestResult];
555
556 oAdd = TestResultFailureData();
557 oAdd.initFromValues(idTestResult = idTestResult,
558 idFailureReason = oFailureReason.idFailureReason,
559 uidAuthor = self.uidSelf,
560 idTestSet = oCaseFile.oTestSet.idTestSet,
561 sComment = sComment,);
562 if self.oConfig.fRealRun:
563 try:
564 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
565 except Exception as oXcpt:
566 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
567 % (oXcpt, oAdd, oCaseFile.sLongName,));
568 else:
569 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
570 return True;
571
572 #
573 # Tools for assiting log parsing.
574 #
575
576 @staticmethod
577 def matchFollowedByLines(sStr, off, asFollowingLines):
578 """ Worker for isThisFollowedByTheseLines. """
579
580 # Advance off to the end of the line.
581 off = sStr.find('\n', off);
582 if off < 0:
583 return False;
584 off += 1;
585
586 # Match each string with the subsequent lines.
587 for iLine, sLine in enumerate(asFollowingLines):
588 offEnd = sStr.find('\n', off);
589 if offEnd < 0:
590 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
591 if sLine and sStr.find(sLine, off, offEnd) < 0:
592 return False;
593
594 # next line.
595 off = offEnd + 1;
596
597 return True;
598
599 @staticmethod
600 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
601 """
602 Looks for a line contining sFirst which is then followed by lines
603 with the strings in asFollowingLines. (No newline chars anywhere!)
604 Returns True / False.
605 """
606 off = sStr.find(sFirst, 0);
607 while off >= 0:
608 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
609 return True;
610 off = sStr.find(sFirst, off + 1);
611 return False;
612
613 @staticmethod
614 def findAndReturnResetOfLine(sHaystack, sNeedle):
615 """
616 Looks for sNeedle in sHaystack.
617 Returns The text following the needle up to the end of the line.
618 Returns None if not found.
619 """
620 if sHaystack is None:
621 return None;
622 off = sHaystack.find(sNeedle);
623 if off < 0:
624 return None;
625 off += len(sNeedle)
626 offEol = sHaystack.find('\n', off);
627 if offEol < 0:
628 offEol = len(sHaystack);
629 return sHaystack[off:offEol]
630
631 @staticmethod
632 def findInAnyAndReturnResetOfLine(asHaystacks, sNeedle):
633 """
634 Looks for sNeedle in zeroe or more haystacks (asHaystack).
635 Returns The text following the first needed found up to the end of the line.
636 Returns None if not found.
637 """
638 for sHaystack in asHaystacks:
639 sRet = VirtualTestSheriff.findAndReturnResetOfLine(sHaystack, sNeedle);
640 if sRet is not None:
641 return sRet;
642 return None;
643
644
645 #
646 # The investigative units.
647 #
648
649 katSimpleInstallUninstallMainLogReasons = [
650 # ( Whether to stop on hit, reason tuple, needle text. )
651 ];
652
653 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
654 'darwin': [
655 # ( Whether to stop on hit, reason tuple, needle text. )
656 ( True, ktReason_Host_DriverNotUnloading,
657 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
658 ],
659 };
660
661
662 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
663 """
664 Investigates an install or uninstall failure.
665
666 We lump the two together since the installation typically also performs
667 an uninstall first and will be seeing similar issues to the uninstall.
668 """
669 _ = fInstall;
670
671 atSimple = self.katSimpleInstallUninstallMainLogReasons;
672 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
673 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
674
675 fFoundSomething = False;
676 for fStopOnHit, tReason, sNeedle in atSimple:
677 if sResultLog.find(sNeedle) > 0:
678 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
679 if fStopOnHit:
680 return True;
681 fFoundSomething = True;
682
683 return fFoundSomething if fFoundSomething else None;
684
685
686 def investigateBadTestBox(self, oCaseFile):
687 """
688 Checks out bad-testbox statuses.
689 """
690 _ = oCaseFile;
691 return False;
692
693
694 def investigateVBoxUnitTest(self, oCaseFile):
695 """
696 Checks out a VBox unittest problem.
697 """
698
699 #
700 # Process simple test case failures first, using their name as reason.
701 # We do the reason management just like for BSODs.
702 #
703 cRelevantOnes = 0;
704 sMainLog = oCaseFile.getMainLog();
705 aoFailedResults = oCaseFile.oTree.getListOfFailures();
706 for oFailedResult in aoFailedResults:
707 if oFailedResult is oCaseFile.oTree:
708 self.vprint('TODO: toplevel failure');
709 cRelevantOnes += 1
710
711 elif oFailedResult.sName == 'Installing VirtualBox':
712 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
713 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
714 cRelevantOnes += 1
715
716 elif oFailedResult.sName == 'Uninstalling VirtualBox':
717 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
718 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
719 cRelevantOnes += 1
720
721 elif oFailedResult.oParent is not None:
722 # Get the 2nd level node because that's where we'll find the unit test name.
723 while oFailedResult.oParent.oParent is not None:
724 oFailedResult = oFailedResult.oParent;
725
726 # Only report a failure once.
727 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
728 sKey = oFailedResult.sName;
729 if sKey.startswith('testcase/'):
730 sKey = sKey[9:];
731 if sKey in self.asUnitTestReasons:
732 tReason = ( self.ksUnitTestCategory, sKey );
733 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
734 else:
735 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
736 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
737 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
738 cRelevantOnes += 1
739 else:
740 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
741
742 #
743 # If we've caught all the relevant ones by now, report the result.
744 #
745 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
746 return self.caseClosed(oCaseFile);
747 return False;
748
749 def extractGuestCpuStack(self, sInfoText):
750 """
751 Extracts the guest CPU stacks from the input file.
752
753 Returns a dictionary keyed by the CPU number, value being a list of
754 raw stack lines (no header).
755 Returns empty dictionary if no stacks where found.
756 """
757 dRet = {};
758 off = 0;
759 while True:
760 # Find the stack.
761 offStart = sInfoText.find('=== start guest stack VCPU ', off);
762 if offStart < 0:
763 break;
764 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
765 if offEnd >= 0:
766 offEnd += 3;
767 else:
768 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
769 if offEnd < 0:
770 offEnd = len(sInfoText);
771
772 sStack = sInfoText[offStart : offEnd];
773 sStack = sStack.replace('\r',''); # paranoia
774 asLines = sStack.split('\n');
775
776 # Figure the CPU.
777 asWords = asLines[0].split();
778 if len(asWords) < 6 or not asWords[5].isdigit():
779 break;
780 iCpu = int(asWords[5]);
781
782 # Add it and advance.
783 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
784 off = offEnd;
785 return dRet;
786
787 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
788 """ Investigates kvm_lock_spinning deadlocks """
789 #
790 # Extract the stacks. We need more than one CPU to create a deadlock.
791 #
792 dStacks = self.extractGuestCpuStack(sInfoText);
793 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
794 if len(dStacks) >= 2:
795 #
796 # Examin each of the stacks. Each must have kvm_lock_spinning in
797 # one of the first three entries.
798 #
799 cHits = 0;
800 for iCpu in dStacks:
801 asBacktrace = dStacks[iCpu];
802 for iFrame in xrange(min(3, len(asBacktrace))):
803 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
804 cHits += 1;
805 break;
806 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
807 if cHits == len(dStacks):
808 return (True, self.ktReason_VMM_kvm_lock_spinning);
809
810 _ = dLogs; _ = oCaseFile;
811 return (False, None);
812
813 ## Things we search a main or VM log for to figure out why something went bust.
814 katSimpleMainAndVmLogReasons = [
815 # ( Whether to stop on hit, reason tuple, needle text. )
816 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
817 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
818 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
819 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
820 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
821 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
822 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
823 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
824 ( True, ktReason_Networking_Nonexistent_host_nic,
825 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
826 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
827 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
828 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
829 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
830 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
831 ( True, ktReason_Unknown_File_Not_Found,
832 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
833 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
834 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
835 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' )
836 ];
837
838 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
839 katSimpleVBoxHardeningLogReasons = [
840 # ( Whether to stop on hit, reason tuple, needle text. )
841 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
842 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
843 ];
844
845 ## Things we search a kernel.log file for to figure out why something went bust.
846 katSimpleKernelLogReasons = [
847 # ( Whether to stop on hit, reason tuple, needle text. )
848 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
849 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
850 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
851 ];
852
853 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
854 katSimpleVgaTextReasons = [
855 # ( Whether to stop on hit, reason tuple, needle text. )
856 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
857 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
858 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
859 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
860 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
861 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
862 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
863 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
864 "and send a report. Then try booting with the 'noapic' option\n"
865 "\n" ),
866 ( True, ktReason_OSInstall_GRUB_hang,
867 "-----\nGRUB Loading stage2..\n\n\n\n" ),
868 ( True, ktReason_Panic_BootManagerC000000F,
869 "Windows failed to start. A recent hardware or software change might be the" ),
870 ];
871
872 ## Things we search for in the info.txt file. Require handlers for now.
873 katInfoTextHandlers = [
874 # ( Trigger text, handler method )
875 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
876 ];
877
878 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
879 katSimpleScreenshotHashReasons = [
880 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
881 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
882 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
883 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
884 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
885 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
886 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
887 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
888 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
889 ];
890
891 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
892 """
893 Investigates a failed VM run.
894 """
895
896 def investigateLogSet():
897 """
898 Investigates the current set of VM related logs.
899 """
900 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
901 % ( len(sResultLog if sResultLog else ''),
902 len(sVMLog if sVMLog else ''),
903 len(sKrnlLog if sKrnlLog else ''),
904 len(sVgaText if sVgaText else ''),
905 len(sInfoText if sInfoText else ''), ));
906
907 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
908 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
909 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
910 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
911 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
912
913 # TODO: more
914
915 #
916 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
917 #
918 sDetails = self.findInAnyAndReturnResetOfLine([ sVMLog, sResultLog ],
919 'GIM: HyperV: Guest indicates a fatal condition! P0=');
920 if sDetails is not None:
921 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
922 sKey = sDetails.split(' ', 1)[0];
923 try: sKey = '0x%08X' % (int(sKey, 16),);
924 except: pass;
925 if sKey in self.asBsodReasons:
926 tReason = ( self.ksBsodCategory, sKey );
927 elif sKey.lower() in self.asBsodReasons: # just in case.
928 tReason = ( self.ksBsodCategory, sKey.lower() );
929 else:
930 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
931 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
932 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
933
934 #
935 # Look for linux panic.
936 #
937 if sKrnlLog is not None:
938 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
939 if sKrnlLog.find(sNeedle) > 0:
940 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
941 if fStopOnHit:
942 return True;
943 fFoundSomething = True;
944
945 #
946 # Loop thru the simple stuff.
947 #
948 fFoundSomething = False;
949 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
950 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
951 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
952 if fStopOnHit:
953 return True;
954 fFoundSomething = True;
955
956 # Continue with vga text.
957 if sVgaText:
958 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
959 if sVgaText.find(sNeedle) > 0:
960 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
961 if fStopOnHit:
962 return True;
963 fFoundSomething = True;
964 _ = sInfoText;
965
966 # Continue with screen hashes.
967 if sScreenHash is not None:
968 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
969 if sScreenHash == sHash:
970 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
971 if fStopOnHit:
972 return True;
973 fFoundSomething = True;
974
975 # Check VBoxHardening.log.
976 if sNtHardLog is not None:
977 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
978 if sNtHardLog.find(sNeedle) > 0:
979 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
980 if fStopOnHit:
981 return True;
982 fFoundSomething = True;
983
984 #
985 # Complicated stuff.
986 #
987 dLogs = {
988 'sVMLog': sVMLog,
989 'sNtHardLog': sNtHardLog,
990 'sScreenHash': sScreenHash,
991 'sKrnlLog': sKrnlLog,
992 'sVgaText': sVgaText,
993 'sInfoText': sInfoText,
994 };
995
996 # info.txt.
997 if sInfoText:
998 for sNeedle, fnHandler in self.katInfoTextHandlers:
999 if sInfoText.find(sNeedle) > 0:
1000 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1001 if tReason is not None:
1002 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1003 if fStop:
1004 return True;
1005 fFoundSomething = True;
1006
1007 #
1008 # Check for repeated reboots...
1009 #
1010 if sVMLog is not None:
1011 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1012 if cResets > 10:
1013 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1014 sComment = 'Counted %s reboots' % (cResets,));
1015
1016 return fFoundSomething;
1017
1018 #
1019 # Check if we got any VM or/and kernel logs. Treat them as sets in
1020 # case we run multiple VMs here (this is of course ASSUMING they
1021 # appear in the order that terminateVmBySession uploads them).
1022 #
1023 cTimes = 0;
1024 sVMLog = None;
1025 sNtHardLog = None;
1026 sScreenHash = None;
1027 sKrnlLog = None;
1028 sVgaText = None;
1029 sInfoText = None;
1030 for oFile in oFailedResult.aoFiles:
1031 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1032 if 'VBoxHardening.log' not in oFile.sFile:
1033 if sVMLog is not None:
1034 if investigateLogSet() is True:
1035 return True;
1036 cTimes += 1;
1037 sInfoText = None;
1038 sVgaText = None;
1039 sKrnlLog = None;
1040 sScreenHash = None;
1041 sNtHardLog = None;
1042 sVMLog = oCaseFile.getLogFile(oFile);
1043 else:
1044 sNtHardLog = oCaseFile.getLogFile(oFile);
1045 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1046 sKrnlLog = oCaseFile.getLogFile(oFile);
1047 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1048 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1049 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1050 sInfoText = oCaseFile.getLogFile(oFile);
1051 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1052 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1053 if sScreenHash is not None:
1054 sScreenHash = sScreenHash.lower();
1055 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1056
1057 if ( sVMLog is not None \
1058 or sNtHardLog is not None \
1059 or cTimes == 0) \
1060 and investigateLogSet() is True:
1061 return True;
1062
1063 return None;
1064
1065
1066 def isResultFromVMRun(self, oFailedResult, sResultLog):
1067 """
1068 Checks if this result and corresponding log snippet looks like a VM run.
1069 """
1070
1071 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1072 if sResultLog.find(' startVm') > 0:
1073 return True;
1074
1075 # Any other indicators? No?
1076 _ = oFailedResult;
1077 return False;
1078
1079 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1080 """
1081 Checks out a VBox VM test.
1082
1083 This is generic investigation of a test running one or more VMs, like
1084 for example a smoke test or a guest installation test.
1085
1086 The fSingleVM parameter is a hint, which probably won't come in useful.
1087 """
1088 _ = fSingleVM;
1089
1090 #
1091 # Get a list of test result failures we should be looking into and the main log.
1092 #
1093 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1094 sMainLog = oCaseFile.getMainLog();
1095
1096 #
1097 # There are a set of errors ending up on the top level result record.
1098 # Should deal with these first.
1099 #
1100 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1101 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1102 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1103 # ignore other failures in the test if we're not a little bit careful here.
1104 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1105 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1106 return self.caseClosed(oCaseFile);
1107
1108 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1109 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1110 (' Timeout: ', ' Attempting to abort child...',) ):
1111 if sMainLog.find('*** glibc detected *** /') > 0:
1112 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1113 else:
1114 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1115 return self.caseClosed(oCaseFile);
1116
1117 # Look for heap corruption without visible hang.
1118 if sMainLog.find('*** glibc detected *** /') > 0 \
1119 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1120 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1121 return self.caseClosed(oCaseFile);
1122
1123 # Out of memory w/ timeout.
1124 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1125 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1126 return self.caseClosed(oCaseFile);
1127
1128 # Stale files like vts_rm.exe (windows).
1129 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1130 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1131 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1132 return self.caseClosed(oCaseFile);
1133
1134 #
1135 # XPCOM screwup
1136 #
1137 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1138 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1139 return self.caseClosed(oCaseFile);
1140
1141 #
1142 # Go thru each failed result.
1143 #
1144 for oFailedResult in aoFailedResults:
1145 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1146 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1147 if oFailedResult.sName == 'Installing VirtualBox':
1148 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1149
1150 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1151 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1152
1153 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1154 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1155
1156 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1157 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1158
1159 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1160 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1161 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1162
1163 elif sResultLog.find('** error: no action was specified') > 0 \
1164 or sResultLog.find('(len(self._asXml, asText))') > 0:
1165 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1166
1167 else:
1168 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1169 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1170
1171 #
1172 # Report home and close the case if we got them all, otherwise log it.
1173 #
1174 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1175 return self.caseClosed(oCaseFile);
1176
1177 if oCaseFile.dReasonForResultId:
1178 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1179 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1180 else:
1181 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1182 return False;
1183
1184
1185 def reasoningFailures(self):
1186 """
1187 Guess the reason for failures.
1188 """
1189 #
1190 # Get a list of failed test sets without any assigned failure reason.
1191 #
1192 cGot = 0;
1193 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1194 for oTestSet in aoTestSets:
1195 self.dprint(u'');
1196 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1197
1198 #
1199 # Open a case file and assign it to the right investigator.
1200 #
1201 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1202 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1203 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1204 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1205 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1206
1207 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1208
1209 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1210 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1211 fRc = self.investigateBadTestBox(oCaseFile);
1212
1213 elif oCaseFile.isVBoxUnitTest():
1214 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1215 fRc = self.investigateVBoxUnitTest(oCaseFile);
1216
1217 elif oCaseFile.isVBoxInstallTest():
1218 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1219 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1220
1221 elif oCaseFile.isVBoxUSBTest():
1222 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1223 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1224
1225 elif oCaseFile.isVBoxStorageTest():
1226 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1227 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1228
1229 elif oCaseFile.isVBoxGAsTest():
1230 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1231 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1232
1233 elif oCaseFile.isVBoxAPITest():
1234 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1235 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1236
1237 elif oCaseFile.isVBoxBenchmarkTest():
1238 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1239 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1240
1241 elif oCaseFile.isVBoxSmokeTest():
1242 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1243 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1244
1245 else:
1246 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1247 fRc = False;
1248 cGot += fRc is True;
1249
1250 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1251 return 0;
1252
1253
1254 def main(self):
1255 """
1256 The 'main' function.
1257 Return exit code (0, 1, etc).
1258 """
1259 # Database stuff.
1260 self.oDb = TMDatabaseConnection()
1261 self.oTestResultLogic = TestResultLogic(self.oDb);
1262 self.oTestSetLogic = TestSetLogic(self.oDb);
1263 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1264 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1265 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1266 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1267
1268 # Get a fix on our 'now' before we do anything..
1269 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1270 self.tsNow = self.oDb.fetchOne();
1271
1272 # If we're suppost to commit anything we need to get our user ID.
1273 rcExit = 0;
1274 if self.oConfig.fRealRun:
1275 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1276 if self.oLogin is None:
1277 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1278 else:
1279 self.uidSelf = self.oLogin.uid;
1280
1281 #
1282 # Do the stuff.
1283 #
1284 if rcExit == 0:
1285 rcExit = self.selfCheck();
1286 if rcExit == 0:
1287 rcExit = self.badTestBoxManagement();
1288 rcExit2 = self.reasoningFailures();
1289 if rcExit == 0:
1290 rcExit = rcExit2;
1291 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1292 if rcExit == 0:
1293 rcExit = self.badTestBoxManagement();
1294
1295 # Cleanup.
1296 self.oFailureReasonLogic = None;
1297 self.oTestResultFailureLogic = None;
1298 self.oTestSetLogic = None;
1299 self.oTestResultLogic = None;
1300 self.oDb.close();
1301 self.oDb = None;
1302 if self.oLogFile is not None:
1303 self.oLogFile.close();
1304 self.oLogFile = None;
1305 return rcExit;
1306
1307if __name__ == '__main__':
1308 sys.exit(VirtualTestSheriff().main());
1309
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette