Commit 21031482 authored by Olivier Berthier's avatar Olivier Berthier Committed by Dylan Baker

framework: Add aborting option when a monitored error is detected

This adds a policy which advises when the user should reboot the system
to avoid noisy test results due to system becoming unstable, for
instance, and therefore continues testing successfully.

To do this, a new module is proposed. A class Monitoring is used for
managing the monitoring rules. Two types of rules, MonitoringFile and
MonitoringLinuxDmesg, derived from the abstract class MonitoringBase,
have been implemented. The first allow to track a pattern on standard
files or locked files. The second, derived from dmesg.LinuxDmesg, will
track a pattern on the dmesg.

The monitoring rules must be defined in piglit.conf at the section
monitored-errors. If one of the regex is found, Piglit will raise a
PiglitAbort exception, stop the test execution -terminating test thread
pool- and exit with code 3.

Then test execution resume, after rebooting the system or not, is done
like usually with command line parameter "resume".

To call it, use command line parameter: --abort-on-monitored-error

This option implies --no-concurrent

This include also a set of unit tests for this module.
Reviewed-by: default avatarDylan Baker <dylanx.c.baker@intel.com>
parent e43a23e7
......@@ -30,6 +30,7 @@ __all__ = [
'PiglitInternalError',
'PiglitFatalError',
'PiglitException',
'PiglitAbort',
'handler',
]
......@@ -51,6 +52,10 @@ def handler(func):
except PiglitFatalError as e:
print('Fatal Error: {}'.format(str(e)), file=sys.stderr)
sys.exit(1)
except PiglitAbort as e:
print('Aborting Piglit execution: {}'.format(str(e)),
file=sys.stderr)
sys.exit(3)
return _inner
......@@ -87,3 +92,12 @@ class PiglitFatalError(Exception):
to the top of the program where it exits.
"""
class PiglitAbort(Exception):
"""Class for non-errors that require piglit aborting.
When this class (or a subclass) is raised it should be raised all the way
to the top of the program where it exits.
"""
This diff is collapsed.
......@@ -178,6 +178,7 @@ class _Options(object): # pylint: disable=too-many-instance-attributes
exclude_filter -- list of compiled regex which exclude tests that match
valgrind -- True if valgrind is to be used
dmesg -- True if dmesg checking is desired. This forces concurrency off
monitored -- True if monitoring is desired. This forces concurrency off
env -- environment variables set for each test before run
"""
......@@ -192,6 +193,7 @@ class _Options(object): # pylint: disable=too-many-instance-attributes
self.exclude_tests = set()
self.valgrind = False
self.dmesg = False
self.monitored = False
self.sync = False
# env is used to set some base environment variables that are not going
......
......@@ -42,6 +42,7 @@ import six
from framework import grouptools, exceptions, options
from framework.dmesg import get_dmesg
from framework.log import LogManager
from framework.monitoring import Monitoring
from framework.test.base import Test
__all__ = [
......@@ -208,6 +209,8 @@ class TestProfile(object):
self._dmesg = None
self.dmesg = False
self.results_dir = None
self._monitoring = None
self.monitoring = False
@property
def dmesg(self):
......@@ -225,6 +228,22 @@ class TestProfile(object):
"""
self._dmesg = get_dmesg(not_dummy)
@property
def monitoring(self):
""" Return monitoring """
return self._monitoring
@monitoring.setter
def monitoring(self, monitored):
""" Set monitoring
Arguments:
monitored -- if Truthy Monitoring will enable monitoring according the
defined rules
"""
self._monitoring = Monitoring(monitored)
def _prepare_test_list(self):
""" Prepare tests for running
......@@ -309,16 +328,18 @@ class TestProfile(object):
self._prepare_test_list()
log = LogManager(logger, len(self.test_list))
def test(pair):
def test(pair, this_pool=None):
"""Function to call test.execute from map"""
name, test = pair
with backend.write_test(name) as w:
test.execute(name, log.get(), self.dmesg)
test.execute(name, log.get(), self.dmesg, self.monitoring)
w(test.result)
if self._monitoring.abort_needed:
this_pool.terminate()
def run_threads(pool, testlist):
""" Open a pool, close it, and join it """
pool.imap(test, testlist, chunksize)
pool.imap(lambda pair: test(pair, pool), testlist, chunksize)
pool.close()
pool.join()
......@@ -345,6 +366,9 @@ class TestProfile(object):
self._post_run_hook()
if self._monitoring.abort_needed:
raise exceptions.PiglitAbort(self._monitoring.error_message)
def filter_tests(self, function):
"""Filter out tests that return false from the supplied function
......
......@@ -137,6 +137,13 @@ def _run_parser(input_):
action="store_true",
help="Capture a difference in dmesg before and "
"after each test. Implies -1/--no-concurrency")
parser.add_argument("--abort-on-monitored-error",
action="store_true",
dest="monitored",
help="Enable monitoring according the rules defined "
"in piglit.conf, and stop the execution when a "
"monitored error is detected. Exit code 3. "
"Implies -1/--no-concurrency")
parser.add_argument("-s", "--sync",
action="store_true",
help="Sync results to disk after every test")
......@@ -225,7 +232,7 @@ def run(input_):
# If dmesg is requested we must have serial run, this is because dmesg
# isn't reliable with threaded run
if args.dmesg:
if args.dmesg or args.monitored:
args.concurrency = "none"
# Pass arguments into Options
......@@ -235,6 +242,7 @@ def run(input_):
options.OPTIONS.execute = args.execute
options.OPTIONS.valgrind = args.valgrind
options.OPTIONS.dmesg = args.dmesg
options.OPTIONS.monitored = args.monitored
options.OPTIONS.sync = args.sync
# Set the platform to pass to waffle
......@@ -284,6 +292,10 @@ def run(input_):
# Set the dmesg type
if args.dmesg:
profile.dmesg = args.dmesg
if args.monitored:
profile.monitoring = args.monitored
profile.run(args.log_level, backend)
results.time_elapsed.end = time.time()
......@@ -319,6 +331,7 @@ def resume(input_):
options.OPTIONS.execute = results.options['execute']
options.OPTIONS.valgrind = results.options['valgrind']
options.OPTIONS.dmesg = results.options['dmesg']
options.OPTIONS.monitored = results.options['monitored']
options.OPTIONS.sync = results.options['sync']
core.get_config(args.config_file)
......@@ -345,6 +358,9 @@ def resume(input_):
if options.OPTIONS.dmesg:
profile.dmesg = options.OPTIONS.dmesg
if options.OPTIONS.monitored:
profile.monitoring = options.OPTIONS.monitored
# This is resumed, don't bother with time since it won't be accurate anyway
profile.run(results.options['log_level'], backend)
......
......@@ -185,7 +185,7 @@ class Test(object):
assert isinstance(timeout, int)
self.timeout = timeout
def execute(self, path, log, dmesg):
def execute(self, path, log, dmesg, monitoring):
""" Run a test
Run a test, but with features. This times the test, uses dmesg checking
......@@ -195,6 +195,7 @@ class Test(object):
path -- the name of the test
log -- a log.Log instance
dmesg -- a dmesg.BaseDmesg derived class
monitoring -- a monitoring.Monitoring instance
"""
log.start(path)
......@@ -203,9 +204,11 @@ class Test(object):
try:
self.result.time.start = time.time()
dmesg.update_dmesg()
monitoring.update_monitoring()
self.run()
self.result.time.end = time.time()
self.result = dmesg.update_result(self.result)
monitoring.check_monitoring()
# This is a rare case where a bare exception is okay, since we're
# using it to log exceptions
except:
......
......@@ -162,3 +162,32 @@ run_test=./%(test_name)s
[expected-crashes]
; Like expected-failures, but specifies that a test is expected to
; crash.
[monitored-errors]
; Set the monitoring rules for checking when the system need to be rebooted.
; Piglit must be launched with --abort-on-monitored-error
;
; For each activated monitoring rule a section must be created in this file that
; contains the type of monitoring (dmesg, file or locked_file).
; Depending on the type, the parameter 'parameters' is a filename or a list of
; options. The regex is the pattern that causes Piglit aborting when it's found.
; Examples :
;
;i915_error_state
;kmsg_monitoring
;dmesg_error
;
;[i915_error_state]
;type=file
;parameters=/sys/kernel/debug/dri/0/i915_error_state
;regex=^((?!no error state collected).)*$
;
;[kmsg_monitoring]
;type=locked_file
;parameters=/dev/kmsg
;regex=\*ERROR\* ring create req|\*ERROR\* Failed to reset chip
;
;[dmesg_error]
;type=dmesg
;parameters=--level emerg,alert,crit,err,warn,notice
;regex=\*ERROR\* ring create req|\*ERROR\* Failed to reset chip|BUG:|Oops:|turning off the locking correctness validator
......@@ -51,7 +51,7 @@ from framework.test.base import (
WindowResizeMixin,
)
from framework.options import _Options as Options
from framework import log, dmesg
from framework import log, dmesg, monitoring
# pylint: disable=invalid-name
......@@ -410,7 +410,8 @@ class TestExecuteTraceback(object):
test.execute(mock.Mock(spec=six.text_type),
mock.Mock(spec=log.BaseLog),
mock.Mock(spec=dmesg.BaseDmesg))
mock.Mock(spec=dmesg.BaseDmesg),
mock.Mock(spec=monitoring.Monitoring))
cls.test = test.result
......
# Copyright (c) 2016 Intel Corporation
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Tests for the monitoring module.
This provides tests for the framework.monitoring modules.
"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
try:
from unittest import mock
except ImportError:
import mock
import nose.tools as nt
from . import utils
from framework import monitoring, exceptions
class TestMonitoring(object):
"""Tests for Monitoring methods."""
def __init__(self):
"""Setup for TestMonitoring
This create a monitoring.Monitoring instance with monitoring disabled
to avoid reading the rules in piglit.conf.
"""
self.regex = r'\*ERROR\*|BUG:'
self.init_contents = r'foo bar\n'
self.no_error_contents = r'foo bar\n'
self.error_contents = r'BUG:bar\n'
self.monitoring = monitoring.Monitoring(False)
def test_Monitoring_delete_rule(self):
"""monitorin.Monitoring: add and delete rule."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('error_file',
'file',
tfile,
self.regex)
self.monitoring.update_monitoring()
self.monitoring.delete_rule('error_file')
with open(tfile, 'w') as fp:
fp.write(self.error_contents)
fp.close()
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, False)
@nt.raises(exceptions.PiglitFatalError)
def test_Monitoring_add_rule_bad_format(self):
"""monitoring.Monitoring: add non existing type rule."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('error_file_bad_type',
'bad_type',
tfile,
self.regex)
def test_Monitoring_file_error(self):
"""monitoring.Monitoring: error found on a file."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('error_file',
'file',
tfile,
self.regex)
self.monitoring.update_monitoring()
with open(tfile, 'w') as fp:
fp.write(self.error_contents)
fp.close()
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, True)
def test_Monitoring_file_no_error(self):
"""monitoring.Monitoring: no error found on a file."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('no_error_file',
'file',
tfile,
self.regex)
self.monitoring.update_monitoring()
with open(tfile, 'w') as fp:
fp.write(self.no_error_contents)
fp.close()
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, False)
def test_Monitoring_locked_file_error(self):
"""monitoring.Monitoring: error found on a locked file."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('error_locked_file',
'locked_file',
tfile,
self.regex)
self.monitoring.update_monitoring()
with open(tfile, 'w') as fp:
fp.write(self.error_contents)
fp.close()
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, True)
def test_Monitoring_locked_file_no_error(self):
"""monitoring.Monitoring: no error found on a locked file."""
with utils.tempfile(self.init_contents) as tfile:
self.monitoring.add_rule('no_error_file',
'locked_file',
tfile,
self.regex)
self.monitoring.update_monitoring()
with open(tfile, 'w') as fp:
fp.write(self.no_error_contents)
fp.close()
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, False)
def test_Monitoring_dmesg_error(self):
"""monitoring.Monitoring: error found on the dmesg."""
utils.platform_check('linux')
mock_out = mock.Mock(return_value=b'[1.0]This\n[2.0]is\n[3.0]dmesg')
with mock.patch('framework.dmesg.subprocess.check_output', mock_out):
self.monitoring.add_rule('no_error_file',
'dmesg',
'--level emerg,alert,crit,err',
self.regex)
self.monitoring.update_monitoring()
mock_out.return_value = b'[4.0]foo\n[5.0]*ERROR* bar'
with mock.patch('framework.dmesg.subprocess.check_output', mock_out):
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, True)
def test_Monitoring_dmesg_no_error(self):
"""monitoring.Monitoring: no error found on the dmesg."""
utils.platform_check('linux')
mock_out = mock.Mock(return_value=b'[1.0]This\n[2.0]is\n[3.0]dmesg')
with mock.patch('framework.dmesg.subprocess.check_output', mock_out):
self.monitoring.add_rule('no_error_file',
'dmesg',
'--level emerg,alert,crit,err',
self.regex)
self.monitoring.update_monitoring()
mock_out.return_value = b'[4.0]foo\n[5.0] bar'
with mock.patch('framework.dmesg.subprocess.check_output', mock_out):
self.monitoring.check_monitoring()
nt.assert_equal(self.monitoring.abort_needed, False)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment