test_updater.py

#!/usr/bin/env python

# Copyright (C) 2018 Bocoup LLC All rights reserved.

#

# Redistribution and use in source and binary forms, with or without

# modification, are permitted provided that the following conditions

# are met:

#

# 1. Redistributions of source code must retain the above

#    copyright notice, this list of conditions and the following

#    disclaimer.

# 2. Redistributions in binary form must reproduce the above

#    copyright notice, this list of conditions and the following

#    disclaimer in the documentation and/or other materials

#    provided with the distribution.

#

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY

# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE

# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,

# OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

# SUCH DAMAGE.

"""

 This run-webkit-tests and analyzes the results then it attempts to update the

 -expected.txt or the root TestExpecations file for failing test. This script is

 intended to be uses after runnning Tools/Scripts/import-w3c-tests to assist in

 creating a new test expectation baseline after importing new tests from

 web-platform-tests.

 The script will update the expectations files according to the following rules:

 Initially the script runs Tools/Script/run-webkit-tests on the specified tests

 or directories to generate a baseline.

    - Missing tests will be re-run to ensure they are not flaky.

    - Crashing or Timing out tests will be added to the root TestExpecations

      files with [ Skip ] directive.

    - Tests that pass and are expected to fail will remove the failing

      directive from the TestExpecations file and will be run again.

    - Failing ref tests will be added to the root TestExpecations files with

      [ ImageOnlyFailure ] directive.

    - Failing testharness tests will be run again with the --reset-results flag

      to reset the -expected.txt file. If testharness tests fail multiple times

      they will be added to the root TestExpecations files with [ Failure ]

      directive.

    - Flaky tests will be added to the root TestExpecations files with all of

      their failure state directives.

"""

import argparse

import json

from subprocess import Popen

import io

import os

from sets import Set

import logging

from webkitpy.layout_tests.run_webkit_tests import parse_args

from webkitpy.common.host import Host

_log = logging.getLogger(__name__)

def configure_logging():

    class LogHandler(logging.StreamHandler):

        def format(self, record):

            if record.levelno > logging.INFO:

                return "%s: %s" % (record.levelname, record.getMessage())

            return record.getMessage()

    logger = logging.getLogger()

    logger.setLevel(logging.INFO)

    handler = LogHandler()

    handler.setLevel(logging.INFO)

    logger.addHandler(handler)

    return handler

# TODO

# Add documentation of argorithm for updating test expectations

# cleanup code to follow webkitpy standards

# add unittests

def main(_argv, _stdout, _stderr):

    configure_logging()

    test_updater = TestExpecationUpdater(Host(), _argv)

    test_updater.do_update()

class TestExpecationUpdater(object):

    def __init__(self, host, args):

        self._host = host

        options, path_args = parse_args(args)

        self._options = options

        option_args = list(Set(args).difference(Set(path_args)))

        # preserve original order of arguments

        option_args = [arg for arg in args if arg in option_args]

        self._option_args = option_args

        self._base_test = path_args

        self._port = host.port_factory.get(options.platform, options)

    def do_update(self):

        # Run tests once to get a baseline

        self._run_webkit_tests(self._base_test)

        with open(self._results_file()) as f:

            data = json.load(f)

        tests = self._pre_process_tests(data['tests'])

        for test in tests:

            _log.info('%s/%s Processing test %s' % (tests.index(test), len(tests), test['name']))

            self._update_expectation_for_failing_test(test)

    def _update_expectation_for_failing_test(self, test, post_reset_result=False, previous_result=None):

        _log.debug(test)

        if test.get('report') == 'FLAKY' or previous_result:

            return self._flaky_test(test, previous_result)

        if test.get('report') == 'MISSING':

            return self._missing_test(test)

        if test.get('report') == 'REGRESSION' and test.get('expected') == 'CRASH':

            self._unexpected_pass_test(test)

        if test.get('actual') == 'CRASH':

            return self._crash_test(test)

        if test.get('actual') == 'TIMEOUT':

            return self._timeout_test(test)

        if test.get('actual') == 'PASS':

            return self._unexpected_pass_test(test)

        if test.get('actual') == 'IMAGE':

            return self._failing_ref_test(test)

        if test.get('actual') == 'TEXT MISSING':

            return self._missing_test(test)

        if test.get('actual') == 'TEXT' and post_reset_result:

            return self._failing_testharness_test(test)

        if test.get('actual') == 'TEXT IMAGE+TEXT':

            return self._failing_testharness_test(test)

        if test.get('actual') == 'TEXT':

            return self._reset_testharness_test(test)

        raise NotImplementedError('The test updater decision engine could not figure out how to handle test: %s' % json.dumps(test))

    def _flaky_test(self, test, previous_result=None):

        expectations = test['actual'].split(' ')

        if previous_result:

            expectations = expectations + previous_result['actual'].split(' ')

            expectations = list(Set(expectations + ['FAIL']))

        self._update_test_expectation(test['name'], self._render_expectations(expectations))

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            self._update_expectation_for_failing_test(result, previous_result=test)

    def _missing_test(self, test):

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            # Test is still failing, attempt to re-classify

            self._update_expectation_for_failing_test(result)

    def _unexpected_pass_test(self, test):

        self._remove_test_expectation(test['name'])

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            self._update_expectation_for_failing_test(result, previous_result=test)

    def _crash_test(self, test):

        self._update_test_expectation(test['name'], 'Skip')

    def _timeout_test(self, test):

        self._update_test_expectation(test['name'], 'Skip')

    def _failing_ref_test(self, test):

        self._update_test_expectation(test['name'], 'ImageOnlyFailure')

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            # Test is still failing, attempt to re-classify

            self._update_expectation_for_failing_test(result, previous_result=test)

    def _reset_testharness_test(self, test):

        self._run_webkit_tests([test['name']], reset_results=True)

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            self._update_expectation_for_failing_test(result, post_reset_result=True)

    def _failing_testharness_test(self, test):

        self._update_test_expectation(test['name'], 'Failure')

        self._run_webkit_tests([test['name']])

        result = self._extract_failing_test_result(test)

        if result:

            self._update_expectation_for_failing_test(result, previous_result=test)

    def _run_webkit_tests(self, test_files, reset_results=False):

        args = ['Tools/Scripts/run-webkit-tests'] + self._option_args

        if reset_results:

            args.append('--reset-results')

        args = args + test_files

        _log.info('Running webkit tests for: %s' % test_files)

        p = Popen(args)

        return p.wait()

    def _test_expectations_path(self):

        return self._port.path_to_generic_test_expectations_file()

    def _results_file(self):

        options = self._options

        return self._host.filesystem.join(options.build_directory, options.configuration, 'layout-test-results/full-results.json')

    def _render_expectations(self, failures):

        expectation_map = {

            'TIMEOUT': 'Timeout',

            'FAIL': 'Failure',

            'TEXT': 'Failure',

            'IMAGE+TEXT': 'Failure',

            'MISSING': '',

            'PASS': 'Pass',

            'IMAGE': 'ImageOnlyFailure',

        }

        return ' '.join([expectation_map[failure] for failure in failures])

    def _update_test_expectation(self, test, expectation):

        self._remove_test_expectation(test)

        with open(self._test_expectations_path(), 'a') as myfile:

            _log.info('Updating TestExpectations %s [ %s ]' % (test, expectation))

            myfile.write('\n%s [ %s ]\n' % (test, expectation))

    def _remove_test_expectation(self, test_name):

        for path in self._port.expectations_files():

            if os.path.isfile(path):

                self._remove_test_expectation_from_path(path, test_name)

    def _remove_test_expectation_from_path(self, expectation_file, test_name):

        with io.open(expectation_file, 'r', encoding="utf-8") as fd:

            lines = fd.readlines()

        with io.open(expectation_file, 'w', encoding="utf-8") as fd:

            for line in lines:

                if test_name not in line:

                    fd.write(line)

    def _extract_failing_test_result(self, test):

        with open(self._results_file()) as f:

            data = json.load(f)

        tests = self._pre_process_tests(data['tests'])

        matching_tests = [t for t in tests if t['name'] == test['name']]

        if len(matching_tests) and not self._results_match_expectation(matching_tests[0]):

            return matching_tests[0]

        else:

            return None

    def _pre_process_tests(self, test_dict):

        tests = self._flatten_path(test_dict)

        processed_tests = []

        for file_name, results in tests.items():

            results['name'] = file_name

            processed_tests.append(results)

        processed_tests = [test for test in processed_tests if not self._results_match_expectation(test)]

        return processed_tests

    def _results_match_expectation(self, result):

        if 'FAIL' in result['expected'] and result['actual'] == 'TEXT':

            return True

        if result['actual'] in result['expected']:

            return True

        return False

    def _flatten_path(self, obj):

        to_return = {}

        for k, v in obj.items():

            if 'expected' in v:

                # terminary node

                to_return[k] = v

                pass

            else:

                flat_object = self._flatten_path(v)

                for k2, v2 in flat_object.items():

                    to_return[k + '/' + k2] = v2

        return to_return
Tools/ChangeLog

Tools/Scripts/update-w3c-tests

Tools/Scripts/webkitpy/layout_tests/controllers/manager.py

Tools/Scripts/webkitpy/w3c/test_updater.py