VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 66429

Last change on this file since 66429 was 66429, checked in by vboxsync, 8 years ago

vsheriff: added ktReason_Host_Modprobe_Failed

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 61.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 66429 2017-04-05 10:56:55Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.virtualbox.org. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 66429 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if self.sMainLog:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 66429 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 ];
376
377 #
378 # Get list of bad test boxes for given period and check them out individually.
379 #
380 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
381 aidFailureReasons = aidFailureReasons);
382 for idTestBox in aidBadTestBoxes:
383 # Skip if the testbox is already disabled or has a pending reboot command.
384 try:
385 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
386 except Exception as oXcpt:
387 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
388 continue;
389 if not oTestBox.fEnabled:
390 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
391 % ( idTestBox, oTestBox.sName, ));
392 continue;
393 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
394 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
395 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
396 continue;
397
398 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
399 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
400 cOkay = 0;
401 cBad = 0;
402 iFirstOkay = len(aoSets);
403 for iSet, oSet in enumerate(aoSets):
404 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
405 cBad += 1;
406 else:
407 # Check for bad failure reasons.
408 oFailure = None;
409 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
410 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
411 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
412 cBad += 1;
413 else:
414 # This is an okay test result then.
415 ## @todo maybe check the elapsed time here, it could still be a bad run?
416 cOkay += 1;
417 if iFirstOkay > iSet:
418 iFirstOkay = iSet;
419 if iSet > 10:
420 break;
421
422 # We react if there are two or more bad-testbox statuses at the head of the
423 # history and at least three in the last 10 results.
424 if iFirstOkay >= 2 and cBad > 2:
425 # Frank: For now don't reboot boxes automatically
426 if True or oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
427 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
428 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
429 if self.oConfig.fRealRun is True:
430 try:
431 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
432 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
433 % (iFirstOkay, cBad, cOkay),);
434 except Exception as oXcpt:
435 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
436 else:
437 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
438 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
439 if self.oConfig.fRealRun is True:
440 try:
441 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
442 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
443 % (iFirstOkay, cBad, cOkay),);
444 except Exception as oXcpt:
445 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
446 else:
447 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
448 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
449 return rcExit;
450
451
452 ## @name Failure reasons we know.
453 ## @{
454 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
455 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
456 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
457 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
458 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
459 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
460 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
461 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
462 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
463 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
464 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
465 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
466 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
467 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
468 ktReason_Host_LeftoverService = ( 'Host', 'Leftover service' );
469 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
470 ktReason_Host_Modprobe_Failed = ( 'Host', 'Modprobe failed' );
471 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
472 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
473 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
474 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
475 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
476 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
477 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
478 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
479 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
480 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
481 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
482 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
483 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
484 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
485 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
486 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
487 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
488 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
489 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
490 ## @}
491
492 ## BSOD category.
493 ksBsodCategory = 'BSOD';
494 ## Special reason indicating that the flesh and blood sheriff has work to do.
495 ksBsodAddNew = 'Add new BSOD';
496
497 ## Unit test category.
498 ksUnitTestCategory = 'Unit';
499 ## Special reason indicating that the flesh and blood sheriff has work to do.
500 ksUnitTestAddNew = 'Add new';
501
502 ## Used for indica that we shouldn't report anything for this test result ID and
503 ## consider promoting the previous error to test set level if it's the only one.
504 ktHarmless = ( 'Probably', 'Caused by previous error' );
505
506
507 def caseClosed(self, oCaseFile):
508 """
509 Reports the findings in the case and closes it.
510 """
511 #
512 # Log it and create a dReasonForReasultId we can use below.
513 #
514 dCommentForResultId = oCaseFile.dCommentForResultId;
515 if oCaseFile.dReasonForResultId:
516 # Must weed out ktHarmless.
517 dReasonForResultId = {};
518 for idKey, tReason in oCaseFile.dReasonForResultId.items():
519 if tReason is not self.ktHarmless:
520 dReasonForResultId[idKey] = tReason;
521 if not dReasonForResultId:
522 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
523 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
524 return False;
525
526 # Try promote to single reason.
527 atValues = dReasonForResultId.values();
528 fSingleReason = True;
529 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
530 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
531 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
532 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
533 else:
534 fSingleReason = False;
535 if fSingleReason:
536 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
537 if dCommentForResultId:
538 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
539 elif oCaseFile.tReason is not None:
540 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
541 else:
542 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
543 return False;
544
545 self.vprint(u'Closing %s with following reason%s: %s'
546 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
547
548 #
549 # Add the test failure reason record(s).
550 #
551 for idTestResult, tReason in dReasonForResultId.items():
552 oFailureReason = self.getFailureReason(tReason);
553 if oFailureReason is not None:
554 sComment = 'Set by $Revision: 66429 $' # Handy for reverting later.
555 if idTestResult in dCommentForResultId:
556 sComment += ': ' + dCommentForResultId[idTestResult];
557
558 oAdd = TestResultFailureData();
559 oAdd.initFromValues(idTestResult = idTestResult,
560 idFailureReason = oFailureReason.idFailureReason,
561 uidAuthor = self.uidSelf,
562 idTestSet = oCaseFile.oTestSet.idTestSet,
563 sComment = sComment,);
564 if self.oConfig.fRealRun:
565 try:
566 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
567 except Exception as oXcpt:
568 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
569 % (oXcpt, oAdd, oCaseFile.sLongName,));
570 else:
571 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
572 return True;
573
574 #
575 # Tools for assiting log parsing.
576 #
577
578 @staticmethod
579 def matchFollowedByLines(sStr, off, asFollowingLines):
580 """ Worker for isThisFollowedByTheseLines. """
581
582 # Advance off to the end of the line.
583 off = sStr.find('\n', off);
584 if off < 0:
585 return False;
586 off += 1;
587
588 # Match each string with the subsequent lines.
589 for iLine, sLine in enumerate(asFollowingLines):
590 offEnd = sStr.find('\n', off);
591 if offEnd < 0:
592 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
593 if sLine and sStr.find(sLine, off, offEnd) < 0:
594 return False;
595
596 # next line.
597 off = offEnd + 1;
598
599 return True;
600
601 @staticmethod
602 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
603 """
604 Looks for a line contining sFirst which is then followed by lines
605 with the strings in asFollowingLines. (No newline chars anywhere!)
606 Returns True / False.
607 """
608 off = sStr.find(sFirst, 0);
609 while off >= 0:
610 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
611 return True;
612 off = sStr.find(sFirst, off + 1);
613 return False;
614
615 @staticmethod
616 def findAndReturnResetOfLine(sHaystack, sNeedle):
617 """
618 Looks for sNeedle in sHaystack.
619 Returns The text following the needle up to the end of the line.
620 Returns None if not found.
621 """
622 if sHaystack is None:
623 return None;
624 off = sHaystack.find(sNeedle);
625 if off < 0:
626 return None;
627 off += len(sNeedle)
628 offEol = sHaystack.find('\n', off);
629 if offEol < 0:
630 offEol = len(sHaystack);
631 return sHaystack[off:offEol]
632
633 @staticmethod
634 def findInAnyAndReturnResetOfLine(asHaystacks, sNeedle):
635 """
636 Looks for sNeedle in zeroe or more haystacks (asHaystack).
637 Returns The text following the first needed found up to the end of the line.
638 Returns None if not found.
639 """
640 for sHaystack in asHaystacks:
641 sRet = VirtualTestSheriff.findAndReturnResetOfLine(sHaystack, sNeedle);
642 if sRet is not None:
643 return sRet;
644 return None;
645
646
647 #
648 # The investigative units.
649 #
650
651 katSimpleInstallUninstallMainLogReasons = [
652 # ( Whether to stop on hit, reason tuple, needle text. )
653 ( False, ktReason_Host_LeftoverService,
654 'SERVICE_NAME: vbox' ),
655 ];
656
657 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
658 'darwin': [
659 # ( Whether to stop on hit, reason tuple, needle text. )
660 ( True, ktReason_Host_DriverNotUnloading,
661 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
662 ],
663 };
664
665
666 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
667 """
668 Investigates an install or uninstall failure.
669
670 We lump the two together since the installation typically also performs
671 an uninstall first and will be seeing similar issues to the uninstall.
672 """
673 _ = fInstall;
674
675 atSimple = self.katSimpleInstallUninstallMainLogReasons;
676 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
677 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
678
679 fFoundSomething = False;
680 for fStopOnHit, tReason, sNeedle in atSimple:
681 if sResultLog.find(sNeedle) > 0:
682 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
683 if fStopOnHit:
684 return True;
685 fFoundSomething = True;
686
687 return fFoundSomething if fFoundSomething else None;
688
689
690 def investigateBadTestBox(self, oCaseFile):
691 """
692 Checks out bad-testbox statuses.
693 """
694 _ = oCaseFile;
695 return False;
696
697
698 def investigateVBoxUnitTest(self, oCaseFile):
699 """
700 Checks out a VBox unittest problem.
701 """
702
703 #
704 # Process simple test case failures first, using their name as reason.
705 # We do the reason management just like for BSODs.
706 #
707 cRelevantOnes = 0;
708 sMainLog = oCaseFile.getMainLog();
709 aoFailedResults = oCaseFile.oTree.getListOfFailures();
710 for oFailedResult in aoFailedResults:
711 if oFailedResult is oCaseFile.oTree:
712 self.vprint('TODO: toplevel failure');
713 cRelevantOnes += 1
714
715 elif oFailedResult.sName == 'Installing VirtualBox':
716 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
717 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
718 cRelevantOnes += 1
719
720 elif oFailedResult.sName == 'Uninstalling VirtualBox':
721 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
722 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
723 cRelevantOnes += 1
724
725 elif oFailedResult.oParent is not None:
726 # Get the 2nd level node because that's where we'll find the unit test name.
727 while oFailedResult.oParent.oParent is not None:
728 oFailedResult = oFailedResult.oParent;
729
730 # Only report a failure once.
731 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
732 sKey = oFailedResult.sName;
733 if sKey.startswith('testcase/'):
734 sKey = sKey[9:];
735 if sKey in self.asUnitTestReasons:
736 tReason = ( self.ksUnitTestCategory, sKey );
737 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
738 else:
739 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
740 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
741 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
742 cRelevantOnes += 1
743 else:
744 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
745
746 #
747 # If we've caught all the relevant ones by now, report the result.
748 #
749 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
750 return self.caseClosed(oCaseFile);
751 return False;
752
753 def extractGuestCpuStack(self, sInfoText):
754 """
755 Extracts the guest CPU stacks from the input file.
756
757 Returns a dictionary keyed by the CPU number, value being a list of
758 raw stack lines (no header).
759 Returns empty dictionary if no stacks where found.
760 """
761 dRet = {};
762 off = 0;
763 while True:
764 # Find the stack.
765 offStart = sInfoText.find('=== start guest stack VCPU ', off);
766 if offStart < 0:
767 break;
768 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
769 if offEnd >= 0:
770 offEnd += 3;
771 else:
772 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
773 if offEnd < 0:
774 offEnd = len(sInfoText);
775
776 sStack = sInfoText[offStart : offEnd];
777 sStack = sStack.replace('\r',''); # paranoia
778 asLines = sStack.split('\n');
779
780 # Figure the CPU.
781 asWords = asLines[0].split();
782 if len(asWords) < 6 or not asWords[5].isdigit():
783 break;
784 iCpu = int(asWords[5]);
785
786 # Add it and advance.
787 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
788 off = offEnd;
789 return dRet;
790
791 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
792 """ Investigates kvm_lock_spinning deadlocks """
793 #
794 # Extract the stacks. We need more than one CPU to create a deadlock.
795 #
796 dStacks = self.extractGuestCpuStack(sInfoText);
797 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
798 if len(dStacks) >= 2:
799 #
800 # Examin each of the stacks. Each must have kvm_lock_spinning in
801 # one of the first three entries.
802 #
803 cHits = 0;
804 for iCpu in dStacks:
805 asBacktrace = dStacks[iCpu];
806 for iFrame in xrange(min(3, len(asBacktrace))):
807 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
808 cHits += 1;
809 break;
810 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
811 if cHits == len(dStacks):
812 return (True, self.ktReason_VMM_kvm_lock_spinning);
813
814 _ = dLogs; _ = oCaseFile;
815 return (False, None);
816
817 ## Things we search a main or VM log for to figure out why something went bust.
818 katSimpleMainAndVmLogReasons = [
819 # ( Whether to stop on hit, reason tuple, needle text. )
820 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
821 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
822 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
823 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
824 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
825 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
826 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
827 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
828 ( True, ktReason_Networking_Nonexistent_host_nic,
829 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
830 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
831 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
832 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
833 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
834 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
835 ( True, ktReason_Unknown_File_Not_Found,
836 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
837 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
838 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
839 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' ),
840 ( True, ktReason_Host_Modprobe_Failed, 'modprobe vboxdrv failed' )
841 ];
842
843 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
844 katSimpleVBoxHardeningLogReasons = [
845 # ( Whether to stop on hit, reason tuple, needle text. )
846 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
847 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
848 ];
849
850 ## Things we search a kernel.log file for to figure out why something went bust.
851 katSimpleKernelLogReasons = [
852 # ( Whether to stop on hit, reason tuple, needle text. )
853 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
854 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
855 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
856 ];
857
858 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
859 katSimpleVgaTextReasons = [
860 # ( Whether to stop on hit, reason tuple, needle text. )
861 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
862 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
863 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
864 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
865 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
866 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
867 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
868 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
869 "and send a report. Then try booting with the 'noapic' option\n"
870 "\n" ),
871 ( True, ktReason_OSInstall_GRUB_hang,
872 "-----\nGRUB Loading stage2..\n\n\n\n" ),
873 ( True, ktReason_Panic_BootManagerC000000F,
874 "Windows failed to start. A recent hardware or software change might be the" ),
875 ];
876
877 ## Things we search for in the info.txt file. Require handlers for now.
878 katInfoTextHandlers = [
879 # ( Trigger text, handler method )
880 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
881 ];
882
883 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
884 katSimpleScreenshotHashReasons = [
885 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
886 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
887 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
888 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
889 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
890 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
891 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
892 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
893 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
894 ];
895
896 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
897 """
898 Investigates a failed VM run.
899 """
900
901 def investigateLogSet():
902 """
903 Investigates the current set of VM related logs.
904 """
905 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
906 % ( len(sResultLog if sResultLog else ''),
907 len(sVMLog if sVMLog else ''),
908 len(sKrnlLog if sKrnlLog else ''),
909 len(sVgaText if sVgaText else ''),
910 len(sInfoText if sInfoText else ''), ));
911
912 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
913 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
914 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
915 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
916 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
917
918 # TODO: more
919
920 #
921 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
922 #
923 sDetails = self.findInAnyAndReturnResetOfLine([ sVMLog, sResultLog ],
924 'GIM: HyperV: Guest indicates a fatal condition! P0=');
925 if sDetails is not None:
926 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
927 sKey = sDetails.split(' ', 1)[0];
928 try: sKey = '0x%08X' % (int(sKey, 16),);
929 except: pass;
930 if sKey in self.asBsodReasons:
931 tReason = ( self.ksBsodCategory, sKey );
932 elif sKey.lower() in self.asBsodReasons: # just in case.
933 tReason = ( self.ksBsodCategory, sKey.lower() );
934 else:
935 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
936 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
937 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
938
939 #
940 # Look for linux panic.
941 #
942 if sKrnlLog is not None:
943 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
944 if sKrnlLog.find(sNeedle) > 0:
945 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
946 if fStopOnHit:
947 return True;
948 fFoundSomething = True;
949
950 #
951 # Loop thru the simple stuff.
952 #
953 fFoundSomething = False;
954 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
955 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
956 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
957 if fStopOnHit:
958 return True;
959 fFoundSomething = True;
960
961 # Continue with vga text.
962 if sVgaText:
963 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
964 if sVgaText.find(sNeedle) > 0:
965 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
966 if fStopOnHit:
967 return True;
968 fFoundSomething = True;
969 _ = sInfoText;
970
971 # Continue with screen hashes.
972 if sScreenHash is not None:
973 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
974 if sScreenHash == sHash:
975 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
976 if fStopOnHit:
977 return True;
978 fFoundSomething = True;
979
980 # Check VBoxHardening.log.
981 if sNtHardLog is not None:
982 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
983 if sNtHardLog.find(sNeedle) > 0:
984 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
985 if fStopOnHit:
986 return True;
987 fFoundSomething = True;
988
989 #
990 # Complicated stuff.
991 #
992 dLogs = {
993 'sVMLog': sVMLog,
994 'sNtHardLog': sNtHardLog,
995 'sScreenHash': sScreenHash,
996 'sKrnlLog': sKrnlLog,
997 'sVgaText': sVgaText,
998 'sInfoText': sInfoText,
999 };
1000
1001 # info.txt.
1002 if sInfoText:
1003 for sNeedle, fnHandler in self.katInfoTextHandlers:
1004 if sInfoText.find(sNeedle) > 0:
1005 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1006 if tReason is not None:
1007 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1008 if fStop:
1009 return True;
1010 fFoundSomething = True;
1011
1012 #
1013 # Check for repeated reboots...
1014 #
1015 if sVMLog is not None:
1016 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1017 if cResets > 10:
1018 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1019 sComment = 'Counted %s reboots' % (cResets,));
1020
1021 return fFoundSomething;
1022
1023 #
1024 # Check if we got any VM or/and kernel logs. Treat them as sets in
1025 # case we run multiple VMs here (this is of course ASSUMING they
1026 # appear in the order that terminateVmBySession uploads them).
1027 #
1028 cTimes = 0;
1029 sVMLog = None;
1030 sNtHardLog = None;
1031 sScreenHash = None;
1032 sKrnlLog = None;
1033 sVgaText = None;
1034 sInfoText = None;
1035 for oFile in oFailedResult.aoFiles:
1036 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1037 if 'VBoxHardening.log' not in oFile.sFile:
1038 if sVMLog is not None:
1039 if investigateLogSet() is True:
1040 return True;
1041 cTimes += 1;
1042 sInfoText = None;
1043 sVgaText = None;
1044 sKrnlLog = None;
1045 sScreenHash = None;
1046 sNtHardLog = None;
1047 sVMLog = oCaseFile.getLogFile(oFile);
1048 else:
1049 sNtHardLog = oCaseFile.getLogFile(oFile);
1050 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1051 sKrnlLog = oCaseFile.getLogFile(oFile);
1052 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1053 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1054 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1055 sInfoText = oCaseFile.getLogFile(oFile);
1056 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1057 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1058 if sScreenHash is not None:
1059 sScreenHash = sScreenHash.lower();
1060 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1061
1062 if ( sVMLog is not None \
1063 or sNtHardLog is not None \
1064 or cTimes == 0) \
1065 and investigateLogSet() is True:
1066 return True;
1067
1068 return None;
1069
1070
1071 def isResultFromVMRun(self, oFailedResult, sResultLog):
1072 """
1073 Checks if this result and corresponding log snippet looks like a VM run.
1074 """
1075
1076 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1077 if sResultLog.find(' startVm') > 0:
1078 return True;
1079
1080 # Any other indicators? No?
1081 _ = oFailedResult;
1082 return False;
1083
1084 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1085 """
1086 Checks out a VBox VM test.
1087
1088 This is generic investigation of a test running one or more VMs, like
1089 for example a smoke test or a guest installation test.
1090
1091 The fSingleVM parameter is a hint, which probably won't come in useful.
1092 """
1093 _ = fSingleVM;
1094
1095 #
1096 # Get a list of test result failures we should be looking into and the main log.
1097 #
1098 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1099 sMainLog = oCaseFile.getMainLog();
1100
1101 #
1102 # There are a set of errors ending up on the top level result record.
1103 # Should deal with these first.
1104 #
1105 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1106 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1107 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1108 # ignore other failures in the test if we're not a little bit careful here.
1109 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1110 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1111 return self.caseClosed(oCaseFile);
1112
1113 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1114 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1115 (' Timeout: ', ' Attempting to abort child...',) ):
1116 if sMainLog.find('*** glibc detected *** /') > 0:
1117 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1118 else:
1119 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1120 return self.caseClosed(oCaseFile);
1121
1122 # Look for heap corruption without visible hang.
1123 if sMainLog.find('*** glibc detected *** /') > 0 \
1124 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1125 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1126 return self.caseClosed(oCaseFile);
1127
1128 # Out of memory w/ timeout.
1129 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1130 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1131 return self.caseClosed(oCaseFile);
1132
1133 # Stale files like vts_rm.exe (windows).
1134 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1135 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1136 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1137 return self.caseClosed(oCaseFile);
1138
1139 #
1140 # XPCOM screwup
1141 #
1142 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1143 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1144 return self.caseClosed(oCaseFile);
1145
1146 #
1147 # Go thru each failed result.
1148 #
1149 for oFailedResult in aoFailedResults:
1150 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1151 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1152 if oFailedResult.sName == 'Installing VirtualBox':
1153 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1154
1155 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1156 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1157
1158 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1159 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1160
1161 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1162 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1163
1164 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1165 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1166 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1167
1168 elif sResultLog.find('** error: no action was specified') > 0 \
1169 or sResultLog.find('(len(self._asXml, asText))') > 0:
1170 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1171
1172 else:
1173 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1174 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1175
1176 #
1177 # Report home and close the case if we got them all, otherwise log it.
1178 #
1179 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1180 return self.caseClosed(oCaseFile);
1181
1182 if oCaseFile.dReasonForResultId:
1183 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1184 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1185 else:
1186 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1187 return False;
1188
1189
1190 def reasoningFailures(self):
1191 """
1192 Guess the reason for failures.
1193 """
1194 #
1195 # Get a list of failed test sets without any assigned failure reason.
1196 #
1197 cGot = 0;
1198 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1199 for oTestSet in aoTestSets:
1200 self.dprint(u'');
1201 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1202
1203 #
1204 # Open a case file and assign it to the right investigator.
1205 #
1206 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1207 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1208 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1209 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1210 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1211
1212 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1213
1214 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1215 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1216 fRc = self.investigateBadTestBox(oCaseFile);
1217
1218 elif oCaseFile.isVBoxUnitTest():
1219 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1220 fRc = self.investigateVBoxUnitTest(oCaseFile);
1221
1222 elif oCaseFile.isVBoxInstallTest():
1223 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1224 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1225
1226 elif oCaseFile.isVBoxUSBTest():
1227 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1228 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1229
1230 elif oCaseFile.isVBoxStorageTest():
1231 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1232 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1233
1234 elif oCaseFile.isVBoxGAsTest():
1235 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1236 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1237
1238 elif oCaseFile.isVBoxAPITest():
1239 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1240 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1241
1242 elif oCaseFile.isVBoxBenchmarkTest():
1243 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1244 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1245
1246 elif oCaseFile.isVBoxSmokeTest():
1247 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1248 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1249
1250 else:
1251 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1252 fRc = False;
1253 cGot += fRc is True;
1254
1255 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1256 return 0;
1257
1258
1259 def main(self):
1260 """
1261 The 'main' function.
1262 Return exit code (0, 1, etc).
1263 """
1264 # Database stuff.
1265 self.oDb = TMDatabaseConnection()
1266 self.oTestResultLogic = TestResultLogic(self.oDb);
1267 self.oTestSetLogic = TestSetLogic(self.oDb);
1268 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1269 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1270 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1271 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1272
1273 # Get a fix on our 'now' before we do anything..
1274 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1275 self.tsNow = self.oDb.fetchOne();
1276
1277 # If we're suppost to commit anything we need to get our user ID.
1278 rcExit = 0;
1279 if self.oConfig.fRealRun:
1280 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1281 if self.oLogin is None:
1282 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1283 else:
1284 self.uidSelf = self.oLogin.uid;
1285
1286 #
1287 # Do the stuff.
1288 #
1289 if rcExit == 0:
1290 rcExit = self.selfCheck();
1291 if rcExit == 0:
1292 rcExit = self.badTestBoxManagement();
1293 rcExit2 = self.reasoningFailures();
1294 if rcExit == 0:
1295 rcExit = rcExit2;
1296 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1297 if rcExit == 0:
1298 rcExit = self.badTestBoxManagement();
1299
1300 # Cleanup.
1301 self.oFailureReasonLogic = None;
1302 self.oTestResultFailureLogic = None;
1303 self.oTestSetLogic = None;
1304 self.oTestResultLogic = None;
1305 self.oDb.close();
1306 self.oDb = None;
1307 if self.oLogFile is not None:
1308 self.oLogFile.close();
1309 self.oLogFile = None;
1310 return rcExit;
1311
1312if __name__ == '__main__':
1313 sys.exit(VirtualTestSheriff().main());
1314
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette