VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 66820

Last change on this file since 66820 was 66820, checked in by vboxsync, 8 years ago

ValidationKit: teach the sheriff to draw the right conclusions from an installer running into a timeout, plus minor typo and other cleanups

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 61.9 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 66820 2017-05-08 15:57:37Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 66820 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if self.sMainLog:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 66820 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 ];
376
377 #
378 # Get list of bad test boxes for given period and check them out individually.
379 #
380 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
381 aidFailureReasons = aidFailureReasons);
382 for idTestBox in aidBadTestBoxes:
383 # Skip if the testbox is already disabled or has a pending reboot command.
384 try:
385 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
386 except Exception as oXcpt:
387 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
388 continue;
389 if not oTestBox.fEnabled:
390 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
391 % ( idTestBox, oTestBox.sName, ));
392 continue;
393 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
394 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
395 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
396 continue;
397
398 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
399 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
400 cOkay = 0;
401 cBad = 0;
402 iFirstOkay = len(aoSets);
403 for iSet, oSet in enumerate(aoSets):
404 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
405 cBad += 1;
406 else:
407 # Check for bad failure reasons.
408 oFailure = None;
409 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
410 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
411 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
412 cBad += 1;
413 else:
414 # This is an okay test result then.
415 ## @todo maybe check the elapsed time here, it could still be a bad run?
416 cOkay += 1;
417 if iFirstOkay > iSet:
418 iFirstOkay = iSet;
419 if iSet > 10:
420 break;
421
422 # We react if there are two or more bad-testbox statuses at the head of the
423 # history and at least three in the last 10 results.
424 if iFirstOkay >= 2 and cBad > 2:
425 # Frank: For now don't reboot boxes automatically
426 if True or oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
427 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
428 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
429 if self.oConfig.fRealRun is True:
430 try:
431 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
432 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
433 % (iFirstOkay, cBad, cOkay),);
434 except Exception as oXcpt:
435 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
436 else:
437 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
438 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
439 if self.oConfig.fRealRun is True:
440 try:
441 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
442 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
443 % (iFirstOkay, cBad, cOkay),);
444 except Exception as oXcpt:
445 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
446 else:
447 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
448 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
449 return rcExit;
450
451
452 ## @name Failure reasons we know.
453 ## @{
454 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
455 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
456 ktReason_BSOD_000000D1 = ( 'BSOD', '0x000000D1' );
457 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
458 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
459 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
460 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
461 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
462 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
463 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
464 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
465 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
466 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
467 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
468 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
469 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
470 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
471 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
472 ktReason_Host_Install_Hang = ( 'Host', 'Install hang' );
473 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
474 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
475 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
476 ktReason_BootManager_Image_corrupt = ( 'Unknown', 'BOOTMGR Image corrupt' );
477 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
478 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
479 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
480 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
481 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
482 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
483 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
484 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
485 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
486 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
487 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
488 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
489 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
490 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
491 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
492 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
493 ## @}
494
495 ## BSOD category.
496 ksBsodCategory = 'BSOD';
497 ## Special reason indicating that the flesh and blood sheriff has work to do.
498 ksBsodAddNew = 'Add new BSOD';
499
500 ## Unit test category.
501 ksUnitTestCategory = 'Unit';
502 ## Special reason indicating that the flesh and blood sheriff has work to do.
503 ksUnitTestAddNew = 'Add new';
504
505 ## Used for indica that we shouldn't report anything for this test result ID and
506 ## consider promoting the previous error to test set level if it's the only one.
507 ktHarmless = ( 'Probably', 'Caused by previous error' );
508
509
510 def caseClosed(self, oCaseFile):
511 """
512 Reports the findings in the case and closes it.
513 """
514 #
515 # Log it and create a dReasonForReasultId we can use below.
516 #
517 dCommentForResultId = oCaseFile.dCommentForResultId;
518 if oCaseFile.dReasonForResultId:
519 # Must weed out ktHarmless.
520 dReasonForResultId = {};
521 for idKey, tReason in oCaseFile.dReasonForResultId.items():
522 if tReason is not self.ktHarmless:
523 dReasonForResultId[idKey] = tReason;
524 if not dReasonForResultId:
525 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
526 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
527 return False;
528
529 # Try promote to single reason.
530 atValues = dReasonForResultId.values();
531 fSingleReason = True;
532 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
533 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
534 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
535 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
536 else:
537 fSingleReason = False;
538 if fSingleReason:
539 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
540 if dCommentForResultId:
541 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
542 elif oCaseFile.tReason is not None:
543 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
544 else:
545 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
546 return False;
547
548 self.vprint(u'Closing %s with following reason%s: %s'
549 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
550
551 #
552 # Add the test failure reason record(s).
553 #
554 for idTestResult, tReason in dReasonForResultId.items():
555 oFailureReason = self.getFailureReason(tReason);
556 if oFailureReason is not None:
557 sComment = 'Set by $Revision: 66820 $' # Handy for reverting later.
558 if idTestResult in dCommentForResultId:
559 sComment += ': ' + dCommentForResultId[idTestResult];
560
561 oAdd = TestResultFailureData();
562 oAdd.initFromValues(idTestResult = idTestResult,
563 idFailureReason = oFailureReason.idFailureReason,
564 uidAuthor = self.uidSelf,
565 idTestSet = oCaseFile.oTestSet.idTestSet,
566 sComment = sComment,);
567 if self.oConfig.fRealRun:
568 try:
569 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
570 except Exception as oXcpt:
571 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
572 % (oXcpt, oAdd, oCaseFile.sLongName,));
573 else:
574 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
575 return True;
576
577 #
578 # Tools for assiting log parsing.
579 #
580
581 @staticmethod
582 def matchFollowedByLines(sStr, off, asFollowingLines):
583 """ Worker for isThisFollowedByTheseLines. """
584
585 # Advance off to the end of the line.
586 off = sStr.find('\n', off);
587 if off < 0:
588 return False;
589 off += 1;
590
591 # Match each string with the subsequent lines.
592 for iLine, sLine in enumerate(asFollowingLines):
593 offEnd = sStr.find('\n', off);
594 if offEnd < 0:
595 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
596 if sLine and sStr.find(sLine, off, offEnd) < 0:
597 return False;
598
599 # next line.
600 off = offEnd + 1;
601
602 return True;
603
604 @staticmethod
605 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
606 """
607 Looks for a line contining sFirst which is then followed by lines
608 with the strings in asFollowingLines. (No newline chars anywhere!)
609 Returns True / False.
610 """
611 off = sStr.find(sFirst, 0);
612 while off >= 0:
613 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
614 return True;
615 off = sStr.find(sFirst, off + 1);
616 return False;
617
618 @staticmethod
619 def findAndReturnRestOfLine(sHaystack, sNeedle):
620 """
621 Looks for sNeedle in sHaystack.
622 Returns The text following the needle up to the end of the line.
623 Returns None if not found.
624 """
625 if sHaystack is None:
626 return None;
627 off = sHaystack.find(sNeedle);
628 if off < 0:
629 return None;
630 off += len(sNeedle)
631 offEol = sHaystack.find('\n', off);
632 if offEol < 0:
633 offEol = len(sHaystack);
634 return sHaystack[off:offEol]
635
636 @staticmethod
637 def findInAnyAndReturnRestOfLine(asHaystacks, sNeedle):
638 """
639 Looks for sNeedle in zeroe or more haystacks (asHaystack).
640 Returns The text following the first needed found up to the end of the line.
641 Returns None if not found.
642 """
643 for sHaystack in asHaystacks:
644 sRet = VirtualTestSheriff.findAndReturnRestOfLine(sHaystack, sNeedle);
645 if sRet is not None:
646 return sRet;
647 return None;
648
649
650 #
651 # The investigative units.
652 #
653
654 katSimpleInstallUninstallMainLogReasons = [
655 # ( Whether to stop on hit, reason tuple, needle text. )
656 ( False, ktReason_Host_LeftoverService,
657 'SERVICE_NAME: vbox' ),
658 ];
659
660 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
661 'darwin': [
662 # ( Whether to stop on hit, reason tuple, needle text. )
663 ( True, ktReason_Host_DriverNotUnloading,
664 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
665 ],
666 };
667
668
669 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
670 """
671 Investigates an install or uninstall failure.
672
673 We lump the two together since the installation typically also performs
674 an uninstall first and will be seeing similar issues to the uninstall.
675 """
676
677 if fInstall and oFailedResult.enmStatus == TestSetData.ksTestStatus_TimedOut:
678 oCaseFile.noteReasonForId(self.ktReason_Host_Install_Hang, oFailedResult.idTestResult)
679 return True;
680
681 atSimple = self.katSimpleInstallUninstallMainLogReasons;
682 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
683 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
684
685 fFoundSomething = False;
686 for fStopOnHit, tReason, sNeedle in atSimple:
687 if sResultLog.find(sNeedle) > 0:
688 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
689 if fStopOnHit:
690 return True;
691 fFoundSomething = True;
692
693 return fFoundSomething if fFoundSomething else None;
694
695
696 def investigateBadTestBox(self, oCaseFile):
697 """
698 Checks out bad-testbox statuses.
699 """
700 _ = oCaseFile;
701 return False;
702
703
704 def investigateVBoxUnitTest(self, oCaseFile):
705 """
706 Checks out a VBox unittest problem.
707 """
708
709 #
710 # Process simple test case failures first, using their name as reason.
711 # We do the reason management just like for BSODs.
712 #
713 cRelevantOnes = 0;
714 sMainLog = oCaseFile.getMainLog();
715 aoFailedResults = oCaseFile.oTree.getListOfFailures();
716 for oFailedResult in aoFailedResults:
717 if oFailedResult is oCaseFile.oTree:
718 self.vprint('TODO: toplevel failure');
719 cRelevantOnes += 1
720
721 elif oFailedResult.sName == 'Installing VirtualBox':
722 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
723 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
724 cRelevantOnes += 1
725
726 elif oFailedResult.sName == 'Uninstalling VirtualBox':
727 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
728 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
729 cRelevantOnes += 1
730
731 elif oFailedResult.oParent is not None:
732 # Get the 2nd level node because that's where we'll find the unit test name.
733 while oFailedResult.oParent.oParent is not None:
734 oFailedResult = oFailedResult.oParent;
735
736 # Only report a failure once.
737 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
738 sKey = oFailedResult.sName;
739 if sKey.startswith('testcase/'):
740 sKey = sKey[9:];
741 if sKey in self.asUnitTestReasons:
742 tReason = ( self.ksUnitTestCategory, sKey );
743 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
744 else:
745 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
746 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
747 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
748 cRelevantOnes += 1
749 else:
750 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
751
752 #
753 # If we've caught all the relevant ones by now, report the result.
754 #
755 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
756 return self.caseClosed(oCaseFile);
757 return False;
758
759 def extractGuestCpuStack(self, sInfoText):
760 """
761 Extracts the guest CPU stacks from the input file.
762
763 Returns a dictionary keyed by the CPU number, value being a list of
764 raw stack lines (no header).
765 Returns empty dictionary if no stacks where found.
766 """
767 dRet = {};
768 off = 0;
769 while True:
770 # Find the stack.
771 offStart = sInfoText.find('=== start guest stack VCPU ', off);
772 if offStart < 0:
773 break;
774 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
775 if offEnd >= 0:
776 offEnd += 3;
777 else:
778 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
779 if offEnd < 0:
780 offEnd = len(sInfoText);
781
782 sStack = sInfoText[offStart : offEnd];
783 sStack = sStack.replace('\r',''); # paranoia
784 asLines = sStack.split('\n');
785
786 # Figure the CPU.
787 asWords = asLines[0].split();
788 if len(asWords) < 6 or not asWords[5].isdigit():
789 break;
790 iCpu = int(asWords[5]);
791
792 # Add it and advance.
793 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
794 off = offEnd;
795 return dRet;
796
797 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
798 """ Investigates kvm_lock_spinning deadlocks """
799 #
800 # Extract the stacks. We need more than one CPU to create a deadlock.
801 #
802 dStacks = self.extractGuestCpuStack(sInfoText);
803 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
804 if len(dStacks) >= 2:
805 #
806 # Examin each of the stacks. Each must have kvm_lock_spinning in
807 # one of the first three entries.
808 #
809 cHits = 0;
810 for iCpu in dStacks:
811 asBacktrace = dStacks[iCpu];
812 for iFrame in xrange(min(3, len(asBacktrace))):
813 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
814 cHits += 1;
815 break;
816 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
817 if cHits == len(dStacks):
818 return (True, self.ktReason_VMM_kvm_lock_spinning);
819
820 _ = dLogs; _ = oCaseFile;
821 return (False, None);
822
823 ## Things we search a main or VM log for to figure out why something went bust.
824 katSimpleMainAndVmLogReasons = [
825 # ( Whether to stop on hit, reason tuple, needle text. )
826 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
827 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
828 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
829 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
830 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
831 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
832 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
833 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
834 ( True, ktReason_Networking_Nonexistent_host_nic,
835 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
836 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
837 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
838 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
839 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
840 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
841 ( True, ktReason_Unknown_File_Not_Found,
842 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
843 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
844 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
845 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
846 ( True, ktReason_Host_Modprobe_Failed, 'Kernel driver not installed' )
847 ];
848
849 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
850 katSimpleVBoxHardeningLogReasons = [
851 # ( Whether to stop on hit, reason tuple, needle text. )
852 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
853 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
854 ];
855
856 ## Things we search a kernel.log file for to figure out why something went bust.
857 katSimpleKernelLogReasons = [
858 # ( Whether to stop on hit, reason tuple, needle text. )
859 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
860 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
861 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
862 ];
863
864 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
865 katSimpleVgaTextReasons = [
866 # ( Whether to stop on hit, reason tuple, needle text. )
867 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
868 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
869 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
870 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
871 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
872 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
873 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
874 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
875 "and send a report. Then try booting with the 'noapic' option\n"
876 "\n" ),
877 ( True, ktReason_OSInstall_GRUB_hang,
878 "-----\nGRUB Loading stage2..\n\n\n\n" ),
879 ( True, ktReason_Panic_BootManagerC000000F,
880 "Windows failed to start. A recent hardware or software change might be the" ),
881 ( True, ktReason_BootManager_Image_corrupt,
882 "BOOTMGR image is corrupt. The system cannot boot." ),
883 ];
884
885 ## Things we search for in the info.txt file. Require handlers for now.
886 katInfoTextHandlers = [
887 # ( Trigger text, handler method )
888 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
889 ];
890
891 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
892 katSimpleScreenshotHashReasons = [
893 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
894 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
895 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
896 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
897 ( True, ktReason_BSOD_000000D1, '134621281f00a3f8aeeb7660064bffbf6187ed56d5852142328d0bcb18ef0ede' ),
898 ( True, ktReason_BSOD_000000D1, '279f11258150c9d2fef041eca65501f3141da8df39256d8f6377e897e3b45a93' ),
899 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
900 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
901 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
902 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
903 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
904 ];
905
906 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
907 """
908 Investigates a failed VM run.
909 """
910
911 def investigateLogSet():
912 """
913 Investigates the current set of VM related logs.
914 """
915 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
916 % ( len(sResultLog if sResultLog else ''),
917 len(sVMLog if sVMLog else ''),
918 len(sKrnlLog if sKrnlLog else ''),
919 len(sVgaText if sVgaText else ''),
920 len(sInfoText if sInfoText else ''), ));
921
922 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
923 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
924 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
925 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
926 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
927
928 # TODO: more
929
930 #
931 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
932 #
933 sDetails = self.findInAnyAndReturnRestOfLine([ sVMLog, sResultLog ],
934 'GIM: HyperV: Guest indicates a fatal condition! P0=');
935 if sDetails is not None:
936 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
937 sKey = sDetails.split(' ', 1)[0];
938 try: sKey = '0x%08X' % (int(sKey, 16),);
939 except: pass;
940 if sKey in self.asBsodReasons:
941 tReason = ( self.ksBsodCategory, sKey );
942 elif sKey.lower() in self.asBsodReasons: # just in case.
943 tReason = ( self.ksBsodCategory, sKey.lower() );
944 else:
945 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
946 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
947 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
948
949 #
950 # Look for linux panic.
951 #
952 if sKrnlLog is not None:
953 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
954 if sKrnlLog.find(sNeedle) > 0:
955 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
956 if fStopOnHit:
957 return True;
958 fFoundSomething = True;
959
960 #
961 # Loop thru the simple stuff.
962 #
963 fFoundSomething = False;
964 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
965 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
966 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
967 if fStopOnHit:
968 return True;
969 fFoundSomething = True;
970
971 # Continue with vga text.
972 if sVgaText:
973 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
974 if sVgaText.find(sNeedle) > 0:
975 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
976 if fStopOnHit:
977 return True;
978 fFoundSomething = True;
979 _ = sInfoText;
980
981 # Continue with screen hashes.
982 if sScreenHash is not None:
983 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
984 if sScreenHash == sHash:
985 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
986 if fStopOnHit:
987 return True;
988 fFoundSomething = True;
989
990 # Check VBoxHardening.log.
991 if sNtHardLog is not None:
992 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
993 if sNtHardLog.find(sNeedle) > 0:
994 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
995 if fStopOnHit:
996 return True;
997 fFoundSomething = True;
998
999 #
1000 # Complicated stuff.
1001 #
1002 dLogs = {
1003 'sVMLog': sVMLog,
1004 'sNtHardLog': sNtHardLog,
1005 'sScreenHash': sScreenHash,
1006 'sKrnlLog': sKrnlLog,
1007 'sVgaText': sVgaText,
1008 'sInfoText': sInfoText,
1009 };
1010
1011 # info.txt.
1012 if sInfoText:
1013 for sNeedle, fnHandler in self.katInfoTextHandlers:
1014 if sInfoText.find(sNeedle) > 0:
1015 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1016 if tReason is not None:
1017 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1018 if fStop:
1019 return True;
1020 fFoundSomething = True;
1021
1022 #
1023 # Check for repeated reboots...
1024 #
1025 if sVMLog is not None:
1026 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1027 if cResets > 10:
1028 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1029 sComment = 'Counted %s reboots' % (cResets,));
1030
1031 return fFoundSomething;
1032
1033 #
1034 # Check if we got any VM or/and kernel logs. Treat them as sets in
1035 # case we run multiple VMs here (this is of course ASSUMING they
1036 # appear in the order that terminateVmBySession uploads them).
1037 #
1038 cTimes = 0;
1039 sVMLog = None;
1040 sNtHardLog = None;
1041 sScreenHash = None;
1042 sKrnlLog = None;
1043 sVgaText = None;
1044 sInfoText = None;
1045 for oFile in oFailedResult.aoFiles:
1046 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1047 if 'VBoxHardening.log' not in oFile.sFile:
1048 if sVMLog is not None:
1049 if investigateLogSet() is True:
1050 return True;
1051 cTimes += 1;
1052 sInfoText = None;
1053 sVgaText = None;
1054 sKrnlLog = None;
1055 sScreenHash = None;
1056 sNtHardLog = None;
1057 sVMLog = oCaseFile.getLogFile(oFile);
1058 else:
1059 sNtHardLog = oCaseFile.getLogFile(oFile);
1060 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1061 sKrnlLog = oCaseFile.getLogFile(oFile);
1062 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1063 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1064 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1065 sInfoText = oCaseFile.getLogFile(oFile);
1066 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1067 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1068 if sScreenHash is not None:
1069 sScreenHash = sScreenHash.lower();
1070 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1071
1072 if ( sVMLog is not None \
1073 or sNtHardLog is not None \
1074 or cTimes == 0) \
1075 and investigateLogSet() is True:
1076 return True;
1077
1078 return None;
1079
1080
1081 def isResultFromVMRun(self, oFailedResult, sResultLog):
1082 """
1083 Checks if this result and corresponding log snippet looks like a VM run.
1084 """
1085
1086 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1087 if sResultLog.find(' startVm') > 0:
1088 return True;
1089
1090 # Any other indicators? No?
1091 _ = oFailedResult;
1092 return False;
1093
1094 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1095 """
1096 Checks out a VBox VM test.
1097
1098 This is generic investigation of a test running one or more VMs, like
1099 for example a smoke test or a guest installation test.
1100
1101 The fSingleVM parameter is a hint, which probably won't come in useful.
1102 """
1103 _ = fSingleVM;
1104
1105 #
1106 # Get a list of test result failures we should be looking into and the main log.
1107 #
1108 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1109 sMainLog = oCaseFile.getMainLog();
1110
1111 #
1112 # There are a set of errors ending up on the top level result record.
1113 # Should deal with these first.
1114 #
1115 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1116 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1117 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1118 # ignore other failures in the test if we're not a little bit careful here.
1119 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1120 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1121 return self.caseClosed(oCaseFile);
1122
1123 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1124 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1125 (' Timeout: ', ' Attempting to abort child...',) ):
1126 if sMainLog.find('*** glibc detected *** /') > 0:
1127 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1128 else:
1129 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1130 return self.caseClosed(oCaseFile);
1131
1132 # Look for heap corruption without visible hang.
1133 if sMainLog.find('*** glibc detected *** /') > 0 \
1134 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1135 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1136 return self.caseClosed(oCaseFile);
1137
1138 # Out of memory w/ timeout.
1139 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1140 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1141 return self.caseClosed(oCaseFile);
1142
1143 # Stale files like vts_rm.exe (windows).
1144 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1145 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1146 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1147 return self.caseClosed(oCaseFile);
1148
1149 #
1150 # XPCOM screwup
1151 #
1152 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1153 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1154 return self.caseClosed(oCaseFile);
1155
1156 #
1157 # Go thru each failed result.
1158 #
1159 for oFailedResult in aoFailedResults:
1160 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1161 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1162 if oFailedResult.sName == 'Installing VirtualBox':
1163 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1164
1165 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1166 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1167
1168 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1169 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1170
1171 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1172 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1173
1174 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1175 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1176 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1177
1178 elif sResultLog.find('** error: no action was specified') > 0 \
1179 or sResultLog.find('(len(self._asXml, asText))') > 0:
1180 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1181
1182 else:
1183 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1184 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1185
1186 #
1187 # Report home and close the case if we got them all, otherwise log it.
1188 #
1189 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1190 return self.caseClosed(oCaseFile);
1191
1192 if oCaseFile.dReasonForResultId:
1193 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1194 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1195 else:
1196 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1197 return False;
1198
1199
1200 def reasoningFailures(self):
1201 """
1202 Guess the reason for failures.
1203 """
1204 #
1205 # Get a list of failed test sets without any assigned failure reason.
1206 #
1207 cGot = 0;
1208 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1209 for oTestSet in aoTestSets:
1210 self.dprint(u'');
1211 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1212
1213 #
1214 # Open a case file and assign it to the right investigator.
1215 #
1216 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1217 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1218 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1219 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1220 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1221
1222 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1223
1224 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1225 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1226 fRc = self.investigateBadTestBox(oCaseFile);
1227
1228 elif oCaseFile.isVBoxUnitTest():
1229 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1230 fRc = self.investigateVBoxUnitTest(oCaseFile);
1231
1232 elif oCaseFile.isVBoxInstallTest():
1233 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1234 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1235
1236 elif oCaseFile.isVBoxUSBTest():
1237 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1238 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1239
1240 elif oCaseFile.isVBoxStorageTest():
1241 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1242 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1243
1244 elif oCaseFile.isVBoxGAsTest():
1245 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1246 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1247
1248 elif oCaseFile.isVBoxAPITest():
1249 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1250 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1251
1252 elif oCaseFile.isVBoxBenchmarkTest():
1253 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1254 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1255
1256 elif oCaseFile.isVBoxSmokeTest():
1257 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1258 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1259
1260 else:
1261 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1262 fRc = False;
1263 cGot += fRc is True;
1264
1265 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1266 return 0;
1267
1268
1269 def main(self):
1270 """
1271 The 'main' function.
1272 Return exit code (0, 1, etc).
1273 """
1274 # Database stuff.
1275 self.oDb = TMDatabaseConnection()
1276 self.oTestResultLogic = TestResultLogic(self.oDb);
1277 self.oTestSetLogic = TestSetLogic(self.oDb);
1278 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1279 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1280 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1281 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1282
1283 # Get a fix on our 'now' before we do anything..
1284 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1285 self.tsNow = self.oDb.fetchOne();
1286
1287 # If we're suppost to commit anything we need to get our user ID.
1288 rcExit = 0;
1289 if self.oConfig.fRealRun:
1290 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1291 if self.oLogin is None:
1292 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1293 else:
1294 self.uidSelf = self.oLogin.uid;
1295
1296 #
1297 # Do the stuff.
1298 #
1299 if rcExit == 0:
1300 rcExit = self.selfCheck();
1301 if rcExit == 0:
1302 rcExit = self.badTestBoxManagement();
1303 rcExit2 = self.reasoningFailures();
1304 if rcExit == 0:
1305 rcExit = rcExit2;
1306 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1307 if rcExit == 0:
1308 rcExit = self.badTestBoxManagement();
1309
1310 # Cleanup.
1311 self.oFailureReasonLogic = None;
1312 self.oTestResultFailureLogic = None;
1313 self.oTestSetLogic = None;
1314 self.oTestResultLogic = None;
1315 self.oDb.close();
1316 self.oDb = None;
1317 if self.oLogFile is not None:
1318 self.oLogFile.close();
1319 self.oLogFile = None;
1320 return rcExit;
1321
1322if __name__ == '__main__':
1323 sys.exit(VirtualTestSheriff().main());
1324
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette