contrib/gcc-changelog/git_commit.py

fb8a8121Smrg#!/usr/bin/env python3
fb8a8121Smrg#
fb8a8121Smrg# This file is part of GCC.
fb8a8121Smrg#
fb8a8121Smrg# GCC is free software; you can redistribute it and/or modify it under
fb8a8121Smrg# the terms of the GNU General Public License as published by the Free
fb8a8121Smrg# Software Foundation; either version 3, or (at your option) any later
fb8a8121Smrg# version.
fb8a8121Smrg#
fb8a8121Smrg# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
fb8a8121Smrg# WARRANTY; without even the implied warranty of MERCHANTABILITY or
fb8a8121Smrg# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
fb8a8121Smrg# for more details.
fb8a8121Smrg#
fb8a8121Smrg# You should have received a copy of the GNU General Public License
fb8a8121Smrg# along with GCC; see the file COPYING3.  If not see
fb8a8121Smrg# <http://www.gnu.org/licenses/>.  */
fb8a8121Smrg
fb8a8121Smrgimport difflib
fb8a8121Smrgimport os
fb8a8121Smrgimport re
a448f87cSmrgimport sys
fb8a8121Smrg
a448f87cSmrgdefault_changelog_locations = {
fb8a8121Smrg    'c++tools',
fb8a8121Smrg    'config',
fb8a8121Smrg    'contrib',
fb8a8121Smrg    'contrib/header-tools',
fb8a8121Smrg    'contrib/reghunt',
fb8a8121Smrg    'contrib/regression',
fb8a8121Smrg    'fixincludes',
fb8a8121Smrg    'gcc/ada',
fb8a8121Smrg    'gcc/analyzer',
fb8a8121Smrg    'gcc/brig',
fb8a8121Smrg    'gcc/c',
fb8a8121Smrg    'gcc/c-family',
fb8a8121Smrg    'gcc',
fb8a8121Smrg    'gcc/cp',
fb8a8121Smrg    'gcc/d',
fb8a8121Smrg    'gcc/fortran',
fb8a8121Smrg    'gcc/go',
fb8a8121Smrg    'gcc/jit',
fb8a8121Smrg    'gcc/lto',
fb8a8121Smrg    'gcc/objc',
fb8a8121Smrg    'gcc/objcp',
fb8a8121Smrg    'gcc/po',
fb8a8121Smrg    'gcc/testsuite',
fb8a8121Smrg    'gnattools',
fb8a8121Smrg    'gotools',
fb8a8121Smrg    'include',
fb8a8121Smrg    'intl',
fb8a8121Smrg    'libada',
fb8a8121Smrg    'libatomic',
fb8a8121Smrg    'libbacktrace',
fb8a8121Smrg    'libcc1',
fb8a8121Smrg    'libcody',
fb8a8121Smrg    'libcpp',
fb8a8121Smrg    'libcpp/po',
fb8a8121Smrg    'libdecnumber',
fb8a8121Smrg    'libffi',
fb8a8121Smrg    'libgcc',
fb8a8121Smrg    'libgcc/config/avr/libf7',
fb8a8121Smrg    'libgcc/config/libbid',
fb8a8121Smrg    'libgfortran',
fb8a8121Smrg    'libgomp',
fb8a8121Smrg    'libhsail-rt',
fb8a8121Smrg    'libiberty',
fb8a8121Smrg    'libitm',
fb8a8121Smrg    'libobjc',
fb8a8121Smrg    'liboffloadmic',
fb8a8121Smrg    'libphobos',
fb8a8121Smrg    'libquadmath',
fb8a8121Smrg    'libsanitizer',
fb8a8121Smrg    'libssp',
fb8a8121Smrg    'libstdc++-v3',
fb8a8121Smrg    'libvtv',
fb8a8121Smrg    'lto-plugin',
fb8a8121Smrg    'maintainer-scripts',
fb8a8121Smrg    'zlib'}
fb8a8121Smrg
fb8a8121Smrgbug_components = {
fb8a8121Smrg    'ada',
fb8a8121Smrg    'analyzer',
fb8a8121Smrg    'boehm-gc',
fb8a8121Smrg    'bootstrap',
fb8a8121Smrg    'c',
fb8a8121Smrg    'c++',
fb8a8121Smrg    'd',
fb8a8121Smrg    'debug',
fb8a8121Smrg    'demangler',
fb8a8121Smrg    'driver',
fb8a8121Smrg    'fastjar',
fb8a8121Smrg    'fortran',
fb8a8121Smrg    'gcov-profile',
fb8a8121Smrg    'go',
fb8a8121Smrg    'hsa',
fb8a8121Smrg    'inline-asm',
fb8a8121Smrg    'ipa',
fb8a8121Smrg    'java',
fb8a8121Smrg    'jit',
fb8a8121Smrg    'libbacktrace',
fb8a8121Smrg    'libf2c',
fb8a8121Smrg    'libffi',
fb8a8121Smrg    'libfortran',
fb8a8121Smrg    'libgcc',
fb8a8121Smrg    'libgcj',
fb8a8121Smrg    'libgomp',
fb8a8121Smrg    'libitm',
fb8a8121Smrg    'libobjc',
fb8a8121Smrg    'libquadmath',
fb8a8121Smrg    'libstdc++',
fb8a8121Smrg    'lto',
fb8a8121Smrg    'middle-end',
fb8a8121Smrg    'modula2',
fb8a8121Smrg    'objc',
fb8a8121Smrg    'objc++',
fb8a8121Smrg    'other',
fb8a8121Smrg    'pch',
fb8a8121Smrg    'pending',
fb8a8121Smrg    'plugins',
fb8a8121Smrg    'preprocessor',
fb8a8121Smrg    'regression',
fb8a8121Smrg    'rtl-optimization',
fb8a8121Smrg    'sanitizer',
fb8a8121Smrg    'spam',
fb8a8121Smrg    'target',
fb8a8121Smrg    'testsuite',
fb8a8121Smrg    'translation',
fb8a8121Smrg    'tree-optimization',
fb8a8121Smrg    'web'}
fb8a8121Smrg
fb8a8121Smrgignored_prefixes = {
fb8a8121Smrg    'gcc/d/dmd/',
fb8a8121Smrg    'gcc/go/gofrontend/',
fb8a8121Smrg    'gcc/testsuite/gdc.test/',
fb8a8121Smrg    'gcc/testsuite/go.test/test/',
a448f87cSmrg    'libffi/',
fb8a8121Smrg    'libgo/',
fb8a8121Smrg    'libphobos/libdruntime/',
fb8a8121Smrg    'libphobos/src/',
fb8a8121Smrg    'libsanitizer/',
fb8a8121Smrg    }
fb8a8121Smrg
fb8a8121Smrgwildcard_prefixes = {
fb8a8121Smrg    'gcc/testsuite/',
fb8a8121Smrg    'libstdc++-v3/doc/html/',
fb8a8121Smrg    'libstdc++-v3/testsuite/'
fb8a8121Smrg    }
fb8a8121Smrg
fb8a8121Smrgmisc_files = {
fb8a8121Smrg    'gcc/DATESTAMP',
fb8a8121Smrg    'gcc/BASE-VER',
fb8a8121Smrg    'gcc/DEV-PHASE'
fb8a8121Smrg    }
fb8a8121Smrg
fb8a8121Smrgauthor_line_regex = \
fb8a8121Smrg        re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
fb8a8121Smrgadditional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
fb8a8121Smrgchangelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
a448f87cSmrgsubject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
a448f87cSmrgsubject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
a448f87cSmrgpr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
fb8a8121Smrgdr_regex = re.compile(r'\tDR ([0-9]+)$')
fb8a8121Smrgstar_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
fb8a8121Smrgend_of_location_regex = re.compile(r'[\[<(:]')
fb8a8121Smrgitem_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
fb8a8121Smrgitem_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
*b1e83836Smrgrevert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
fb8a8121Smrgcherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
fb8a8121Smrg
fb8a8121SmrgLINE_LIMIT = 100
fb8a8121SmrgTAB_WIDTH = 8
fb8a8121SmrgCO_AUTHORED_BY_PREFIX = 'co-authored-by: '
fb8a8121Smrg
fb8a8121SmrgREVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
fb8a8121Smrg                   'acked-by: ', 'tested-by: ', 'reported-by: ',
fb8a8121Smrg                   'suggested-by: ')
fb8a8121SmrgDATE_FORMAT = '%Y-%m-%d'
fb8a8121Smrg
fb8a8121Smrg
fb8a8121Smrgdef decode_path(path):
fb8a8121Smrg    # When core.quotepath is true (default value), utf8 chars are encoded like:
fb8a8121Smrg    # "b/ko\304\215ka.txt"
fb8a8121Smrg    #
fb8a8121Smrg    # The upstream bug is fixed:
fb8a8121Smrg    # https://github.com/gitpython-developers/GitPython/issues/1099
fb8a8121Smrg    #
fb8a8121Smrg    # but we still need a workaround for older versions of the library.
fb8a8121Smrg    # Please take a look at the explanation of the transformation:
fb8a8121Smrg    # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
fb8a8121Smrg
fb8a8121Smrg    if path.startswith('"') and path.endswith('"'):
fb8a8121Smrg        return (path.strip('"').encode('utf8').decode('unicode-escape')
fb8a8121Smrg                .encode('latin-1').decode('utf8'))
fb8a8121Smrg    else:
fb8a8121Smrg        return path
fb8a8121Smrg
fb8a8121Smrg
fb8a8121Smrgclass Error:
a448f87cSmrg    def __init__(self, message, line=None, details=None):
fb8a8121Smrg        self.message = message
fb8a8121Smrg        self.line = line
a448f87cSmrg        self.details = details
fb8a8121Smrg
fb8a8121Smrg    def __repr__(self):
fb8a8121Smrg        s = self.message
fb8a8121Smrg        if self.line:
fb8a8121Smrg            s += ': "%s"' % self.line
fb8a8121Smrg        return s
fb8a8121Smrg
fb8a8121Smrg
fb8a8121Smrgclass ChangeLogEntry:
fb8a8121Smrg    def __init__(self, folder, authors, prs):
fb8a8121Smrg        self.folder = folder
fb8a8121Smrg        # The 'list.copy()' function is not available before Python 3.3
fb8a8121Smrg        self.author_lines = list(authors)
fb8a8121Smrg        self.initial_prs = list(prs)
fb8a8121Smrg        self.prs = list(prs)
fb8a8121Smrg        self.lines = []
fb8a8121Smrg        self.files = []
fb8a8121Smrg        self.file_patterns = []
a448f87cSmrg        self.parentheses_stack = []
fb8a8121Smrg
fb8a8121Smrg    def parse_file_names(self):
fb8a8121Smrg        # Whether the content currently processed is between a star prefix the
fb8a8121Smrg        # end of the file list: a colon or an open paren.
fb8a8121Smrg        in_location = False
fb8a8121Smrg
fb8a8121Smrg        for line in self.lines:
fb8a8121Smrg            # If this line matches the star prefix, start the location
fb8a8121Smrg            # processing on the information that follows the star.
a448f87cSmrg            # Note that we need to skip macro names that can be in form of:
a448f87cSmrg            #
a448f87cSmrg            # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
a448f87cSmrg            # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
a448f87cSmrg            # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
a448f87cSmrg            #
fb8a8121Smrg            m = star_prefix_regex.match(line)
a448f87cSmrg            if m and len(m.group('spaces')) == 1:
fb8a8121Smrg                in_location = True
fb8a8121Smrg                line = m.group('content')
fb8a8121Smrg
fb8a8121Smrg            if in_location:
fb8a8121Smrg                # Strip everything that is not a filename in "line":
fb8a8121Smrg                # entities "(NAME)", cases "<PATTERN>", conditions
fb8a8121Smrg                # "[COND]", entry text (the colon, if present, and
fb8a8121Smrg                # anything that follows it).
fb8a8121Smrg                m = end_of_location_regex.search(line)
fb8a8121Smrg                if m:
fb8a8121Smrg                    line = line[:m.start()]
fb8a8121Smrg                    in_location = False
fb8a8121Smrg
fb8a8121Smrg                # At this point, all that's left is a list of filenames
fb8a8121Smrg                # separated by commas and whitespaces.
fb8a8121Smrg                for file in line.split(','):
fb8a8121Smrg                    file = file.strip()
fb8a8121Smrg                    if file:
fb8a8121Smrg                        if file.endswith('*'):
fb8a8121Smrg                            self.file_patterns.append(file[:-1])
fb8a8121Smrg                        else:
fb8a8121Smrg                            self.files.append(file)
fb8a8121Smrg
fb8a8121Smrg    @property
fb8a8121Smrg    def datetime(self):
fb8a8121Smrg        for author in self.author_lines:
fb8a8121Smrg            if author[1]:
fb8a8121Smrg                return author[1]
fb8a8121Smrg        return None
fb8a8121Smrg
fb8a8121Smrg    @property
fb8a8121Smrg    def authors(self):
fb8a8121Smrg        return [author_line[0] for author_line in self.author_lines]
fb8a8121Smrg
fb8a8121Smrg    @property
fb8a8121Smrg    def is_empty(self):
fb8a8121Smrg        return not self.lines and self.prs == self.initial_prs
fb8a8121Smrg
fb8a8121Smrg    def contains_author(self, author):
fb8a8121Smrg        for author_lines in self.author_lines:
fb8a8121Smrg            if author_lines[0] == author:
fb8a8121Smrg                return True
fb8a8121Smrg        return False
fb8a8121Smrg
fb8a8121Smrg
fb8a8121Smrgclass GitInfo:
fb8a8121Smrg    def __init__(self, hexsha, date, author, lines, modified_files):
fb8a8121Smrg        self.hexsha = hexsha
fb8a8121Smrg        self.date = date
fb8a8121Smrg        self.author = author
fb8a8121Smrg        self.lines = lines
fb8a8121Smrg        self.modified_files = modified_files
fb8a8121Smrg
fb8a8121Smrg
fb8a8121Smrgclass GitCommit:
a448f87cSmrg    def __init__(self, info, commit_to_info_hook=None, ref_name=None):
fb8a8121Smrg        self.original_info = info
fb8a8121Smrg        self.info = info
fb8a8121Smrg        self.message = None
fb8a8121Smrg        self.changes = None
fb8a8121Smrg        self.changelog_entries = []
fb8a8121Smrg        self.errors = []
fb8a8121Smrg        self.top_level_authors = []
fb8a8121Smrg        self.co_authors = []
fb8a8121Smrg        self.top_level_prs = []
a448f87cSmrg        self.subject_prs = set()
fb8a8121Smrg        self.cherry_pick_commit = None
fb8a8121Smrg        self.revert_commit = None
fb8a8121Smrg        self.commit_to_info_hook = commit_to_info_hook
a448f87cSmrg        self.init_changelog_locations(ref_name)
fb8a8121Smrg
fb8a8121Smrg        # Skip Update copyright years commits
fb8a8121Smrg        if self.info.lines and self.info.lines[0] == 'Update copyright years.':
fb8a8121Smrg            return
fb8a8121Smrg
a448f87cSmrg        if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
a448f87cSmrg            self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
a448f87cSmrg
fb8a8121Smrg        # Identify first if the commit is a Revert commit
fb8a8121Smrg        for line in self.info.lines:
*b1e83836Smrg            m = revert_regex.fullmatch(line)
fb8a8121Smrg            if m:
fb8a8121Smrg                self.revert_commit = m.group('hash')
fb8a8121Smrg                break
fb8a8121Smrg        if self.revert_commit:
fb8a8121Smrg            self.info = self.commit_to_info_hook(self.revert_commit)
fb8a8121Smrg
a448f87cSmrg        # The following happens for get_email.py:
a448f87cSmrg        if not self.info:
a448f87cSmrg            return
a448f87cSmrg
a448f87cSmrg        self.check_commit_email()
a448f87cSmrg
a448f87cSmrg        # Extract PR numbers form the subject line
a448f87cSmrg        # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
a448f87cSmrg        if self.info.lines and not self.revert_commit:
a448f87cSmrg            self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
a448f87cSmrg            for m in subject_pr_regex.finditer(info.lines[0]):
a448f87cSmrg                if not m.group('component') in bug_components:
a448f87cSmrg                    self.errors.append(Error('invalid PR component in subject', info.lines[0]))
a448f87cSmrg                self.subject_prs.add(m.group('pr'))
a448f87cSmrg
a448f87cSmrg        # Allow complete deletion of ChangeLog files in a commit
fb8a8121Smrg        project_files = [f for f in self.info.modified_files
a448f87cSmrg                         if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
fb8a8121Smrg                         or f[0] in misc_files]
fb8a8121Smrg        ignored_files = [f for f in self.info.modified_files
fb8a8121Smrg                         if self.in_ignored_location(f[0])]
fb8a8121Smrg        if len(project_files) == len(self.info.modified_files):
fb8a8121Smrg            # All modified files are only MISC files
fb8a8121Smrg            return
a448f87cSmrg        elif project_files:
a448f87cSmrg            err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
a448f87cSmrg                  'should be done separately from normal commits\n' \
a448f87cSmrg                  '(note: ChangeLog entries will be automatically ' \
a448f87cSmrg                  'added by a cron job)'
a448f87cSmrg            self.errors.append(Error(err))
fb8a8121Smrg            return
fb8a8121Smrg
fb8a8121Smrg        all_are_ignored = (len(project_files) + len(ignored_files)
fb8a8121Smrg                           == len(self.info.modified_files))
fb8a8121Smrg        self.parse_lines(all_are_ignored)
fb8a8121Smrg        if self.changes:
fb8a8121Smrg            self.parse_changelog()
fb8a8121Smrg            self.parse_file_names()
fb8a8121Smrg            self.check_for_empty_description()
a448f87cSmrg            self.check_for_broken_parentheses()
fb8a8121Smrg            self.deduce_changelog_locations()
fb8a8121Smrg            self.check_file_patterns()
fb8a8121Smrg            if not self.errors:
fb8a8121Smrg                self.check_mentioned_files()
fb8a8121Smrg                self.check_for_correct_changelog()
a448f87cSmrg        if self.subject_prs:
a448f87cSmrg            self.errors.append(Error('PR %s in subject but not in changelog' %
a448f87cSmrg                                     ', '.join(self.subject_prs), self.info.lines[0]))
fb8a8121Smrg
fb8a8121Smrg    @property
fb8a8121Smrg    def success(self):
fb8a8121Smrg        return not self.errors
fb8a8121Smrg
fb8a8121Smrg    @property
fb8a8121Smrg    def new_files(self):
fb8a8121Smrg        return [x[0] for x in self.info.modified_files if x[1] == 'A']
fb8a8121Smrg
fb8a8121Smrg    @classmethod
a448f87cSmrg    def is_changelog_filename(cls, path, allow_suffix=False):
a448f87cSmrg        basename = os.path.basename(path)
a448f87cSmrg        if basename == 'ChangeLog':
a448f87cSmrg            return True
a448f87cSmrg        elif allow_suffix and basename.startswith('ChangeLog'):
a448f87cSmrg            return True
a448f87cSmrg        else:
a448f87cSmrg            return False
fb8a8121Smrg
a448f87cSmrg    def find_changelog_location(self, name):
fb8a8121Smrg        if name.startswith('\t'):
fb8a8121Smrg            name = name[1:]
fb8a8121Smrg        if name.endswith(':'):
fb8a8121Smrg            name = name[:-1]
fb8a8121Smrg        if name.endswith('/'):
fb8a8121Smrg            name = name[:-1]
a448f87cSmrg        return name if name in self.changelog_locations else None
fb8a8121Smrg
fb8a8121Smrg    @classmethod
fb8a8121Smrg    def format_git_author(cls, author):
fb8a8121Smrg        assert '<' in author
fb8a8121Smrg        return author.replace('<', ' <')
fb8a8121Smrg
fb8a8121Smrg    @classmethod
fb8a8121Smrg    def parse_git_name_status(cls, string):
fb8a8121Smrg        modified_files = []
fb8a8121Smrg        for entry in string.split('\n'):
fb8a8121Smrg            parts = entry.split('\t')
fb8a8121Smrg            t = parts[0]
fb8a8121Smrg            if t == 'A' or t == 'D' or t == 'M':
fb8a8121Smrg                modified_files.append((parts[1], t))
fb8a8121Smrg            elif t.startswith('R'):
fb8a8121Smrg                modified_files.append((parts[1], 'D'))
fb8a8121Smrg                modified_files.append((parts[2], 'A'))
fb8a8121Smrg        return modified_files
fb8a8121Smrg
a448f87cSmrg    def init_changelog_locations(self, ref_name):
a448f87cSmrg        self.changelog_locations = list(default_changelog_locations)
a448f87cSmrg        if ref_name:
a448f87cSmrg            version = sys.maxsize
a448f87cSmrg            if 'releases/gcc-' in ref_name:
a448f87cSmrg                version = int(ref_name.split('-')[-1])
a448f87cSmrg            if version >= 12:
a448f87cSmrg                # HSA and BRIG were removed in GCC 12
a448f87cSmrg                self.changelog_locations.remove('gcc/brig')
a448f87cSmrg                self.changelog_locations.remove('libhsail-rt')
a448f87cSmrg
fb8a8121Smrg    def parse_lines(self, all_are_ignored):
fb8a8121Smrg        body = self.info.lines
fb8a8121Smrg
fb8a8121Smrg        for i, b in enumerate(body):
fb8a8121Smrg            if not b:
fb8a8121Smrg                continue
fb8a8121Smrg            if (changelog_regex.match(b) or self.find_changelog_location(b)
fb8a8121Smrg                    or star_prefix_regex.match(b) or pr_regex.match(b)
a448f87cSmrg                    or dr_regex.match(b) or author_line_regex.match(b)
a448f87cSmrg                    or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
fb8a8121Smrg                self.changes = body[i:]
fb8a8121Smrg                return
fb8a8121Smrg        if not all_are_ignored:
fb8a8121Smrg            self.errors.append(Error('cannot find a ChangeLog location in '
fb8a8121Smrg                                     'message'))
fb8a8121Smrg
fb8a8121Smrg    def parse_changelog(self):
fb8a8121Smrg        last_entry = None
fb8a8121Smrg        will_deduce = False
fb8a8121Smrg        for line in self.changes:
fb8a8121Smrg            if not line:
fb8a8121Smrg                if last_entry and will_deduce:
fb8a8121Smrg                    last_entry = None
fb8a8121Smrg                continue
fb8a8121Smrg            if line != line.rstrip():
fb8a8121Smrg                self.errors.append(Error('trailing whitespace', line))
fb8a8121Smrg            if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
a448f87cSmrg                # support long filenames
a448f87cSmrg                if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
fb8a8121Smrg                    self.errors.append(Error('line exceeds %d character limit'
fb8a8121Smrg                                             % LINE_LIMIT, line))
fb8a8121Smrg            m = changelog_regex.match(line)
fb8a8121Smrg            if m:
fb8a8121Smrg                last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
fb8a8121Smrg                                            self.top_level_authors,
fb8a8121Smrg                                            self.top_level_prs)
fb8a8121Smrg                self.changelog_entries.append(last_entry)
fb8a8121Smrg            elif self.find_changelog_location(line):
fb8a8121Smrg                last_entry = ChangeLogEntry(self.find_changelog_location(line),
fb8a8121Smrg                                            self.top_level_authors,
fb8a8121Smrg                                            self.top_level_prs)
fb8a8121Smrg                self.changelog_entries.append(last_entry)
fb8a8121Smrg            else:
fb8a8121Smrg                author_tuple = None
fb8a8121Smrg                pr_line = None
fb8a8121Smrg                if author_line_regex.match(line):
fb8a8121Smrg                    m = author_line_regex.match(line)
fb8a8121Smrg                    author_tuple = (m.group('name'), m.group('datetime'))
fb8a8121Smrg                elif additional_author_regex.match(line):
fb8a8121Smrg                    m = additional_author_regex.match(line)
fb8a8121Smrg                    if len(m.group('spaces')) != 4:
fb8a8121Smrg                        msg = 'additional author must be indented with '\
fb8a8121Smrg                              'one tab and four spaces'
fb8a8121Smrg                        self.errors.append(Error(msg, line))
fb8a8121Smrg                    else:
fb8a8121Smrg                        author_tuple = (m.group('name'), None)
fb8a8121Smrg                elif pr_regex.match(line):
a448f87cSmrg                    m = pr_regex.match(line)
a448f87cSmrg                    component = m.group('component')
a448f87cSmrg                    pr = m.group('pr')
fb8a8121Smrg                    if not component:
fb8a8121Smrg                        self.errors.append(Error('missing PR component', line))
fb8a8121Smrg                        continue
fb8a8121Smrg                    elif not component[:-1] in bug_components:
fb8a8121Smrg                        self.errors.append(Error('invalid PR component', line))
fb8a8121Smrg                        continue
fb8a8121Smrg                    else:
fb8a8121Smrg                        pr_line = line.lstrip()
a448f87cSmrg                    if pr in self.subject_prs:
a448f87cSmrg                        self.subject_prs.remove(pr)
fb8a8121Smrg                elif dr_regex.match(line):
fb8a8121Smrg                    pr_line = line.lstrip()
fb8a8121Smrg
fb8a8121Smrg                lowered_line = line.lower()
fb8a8121Smrg                if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
fb8a8121Smrg                    name = line[len(CO_AUTHORED_BY_PREFIX):]
fb8a8121Smrg                    author = self.format_git_author(name)
fb8a8121Smrg                    self.co_authors.append(author)
fb8a8121Smrg                    continue
fb8a8121Smrg                elif lowered_line.startswith(REVIEW_PREFIXES):
fb8a8121Smrg                    continue
fb8a8121Smrg                else:
fb8a8121Smrg                    m = cherry_pick_regex.search(line)
fb8a8121Smrg                    if m:
fb8a8121Smrg                        commit = m.group('hash')
fb8a8121Smrg                        if self.cherry_pick_commit:
fb8a8121Smrg                            msg = 'multiple cherry pick lines'
fb8a8121Smrg                            self.errors.append(Error(msg, line))
fb8a8121Smrg                        else:
fb8a8121Smrg                            self.cherry_pick_commit = commit
fb8a8121Smrg                        continue
fb8a8121Smrg
fb8a8121Smrg                # ChangeLog name will be deduced later
fb8a8121Smrg                if not last_entry:
fb8a8121Smrg                    if author_tuple:
fb8a8121Smrg                        self.top_level_authors.append(author_tuple)
fb8a8121Smrg                        continue
fb8a8121Smrg                    elif pr_line:
fb8a8121Smrg                        # append to top_level_prs only when we haven't met
fb8a8121Smrg                        # a ChangeLog entry
fb8a8121Smrg                        if (pr_line not in self.top_level_prs
fb8a8121Smrg                                and not self.changelog_entries):
fb8a8121Smrg                            self.top_level_prs.append(pr_line)
fb8a8121Smrg                        continue
fb8a8121Smrg                    else:
fb8a8121Smrg                        last_entry = ChangeLogEntry(None,
fb8a8121Smrg                                                    self.top_level_authors,
fb8a8121Smrg                                                    self.top_level_prs)
fb8a8121Smrg                        self.changelog_entries.append(last_entry)
fb8a8121Smrg                        will_deduce = True
fb8a8121Smrg                elif author_tuple:
fb8a8121Smrg                    if not last_entry.contains_author(author_tuple[0]):
fb8a8121Smrg                        last_entry.author_lines.append(author_tuple)
fb8a8121Smrg                    continue
fb8a8121Smrg
fb8a8121Smrg                if not line.startswith('\t'):
fb8a8121Smrg                    err = Error('line should start with a tab', line)
fb8a8121Smrg                    self.errors.append(err)
fb8a8121Smrg                elif pr_line:
fb8a8121Smrg                    last_entry.prs.append(pr_line)
fb8a8121Smrg                else:
fb8a8121Smrg                    m = star_prefix_regex.match(line)
fb8a8121Smrg                    if m:
a448f87cSmrg                        if (len(m.group('spaces')) != 1 and
a448f87cSmrg                                not last_entry.parentheses_stack):
fb8a8121Smrg                            msg = 'one space should follow asterisk'
fb8a8121Smrg                            self.errors.append(Error(msg, line))
fb8a8121Smrg                        else:
fb8a8121Smrg                            content = m.group('content')
fb8a8121Smrg                            parts = content.split(':')
fb8a8121Smrg                            if len(parts) > 1:
fb8a8121Smrg                                for needle in ('()', '[]', '<>'):
fb8a8121Smrg                                    if ' ' + needle in parts[0]:
fb8a8121Smrg                                        msg = f'empty group "{needle}" found'
fb8a8121Smrg                                        self.errors.append(Error(msg, line))
fb8a8121Smrg                            last_entry.lines.append(line)
a448f87cSmrg                            self.process_parentheses(last_entry, line)
fb8a8121Smrg                    else:
fb8a8121Smrg                        if last_entry.is_empty:
fb8a8121Smrg                            msg = 'first line should start with a tab, ' \
fb8a8121Smrg                                  'an asterisk and a space'
fb8a8121Smrg                            self.errors.append(Error(msg, line))
fb8a8121Smrg                        else:
fb8a8121Smrg                            last_entry.lines.append(line)
a448f87cSmrg                            self.process_parentheses(last_entry, line)
a448f87cSmrg
a448f87cSmrg    def process_parentheses(self, last_entry, line):
a448f87cSmrg        for c in line:
a448f87cSmrg            if c == '(':
a448f87cSmrg                last_entry.parentheses_stack.append(line)
a448f87cSmrg            elif c == ')':
a448f87cSmrg                if not last_entry.parentheses_stack:
a448f87cSmrg                    msg = 'bad wrapping of parenthesis'
a448f87cSmrg                    self.errors.append(Error(msg, line))
a448f87cSmrg                else:
a448f87cSmrg                    del last_entry.parentheses_stack[-1]
fb8a8121Smrg
fb8a8121Smrg    def parse_file_names(self):
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            entry.parse_file_names()
fb8a8121Smrg
fb8a8121Smrg    def check_file_patterns(self):
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            for pattern in entry.file_patterns:
fb8a8121Smrg                name = os.path.join(entry.folder, pattern)
fb8a8121Smrg                if not [name.startswith(pr) for pr in wildcard_prefixes]:
fb8a8121Smrg                    msg = 'unsupported wildcard prefix'
fb8a8121Smrg                    self.errors.append(Error(msg, name))
fb8a8121Smrg
fb8a8121Smrg    def check_for_empty_description(self):
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            for i, line in enumerate(entry.lines):
fb8a8121Smrg                if (item_empty_regex.match(line) and
fb8a8121Smrg                    (i == len(entry.lines) - 1
fb8a8121Smrg                     or not entry.lines[i+1].strip()
fb8a8121Smrg                     or item_parenthesis_regex.match(entry.lines[i+1]))):
fb8a8121Smrg                    msg = 'missing description of a change'
fb8a8121Smrg                    self.errors.append(Error(msg, line))
fb8a8121Smrg
a448f87cSmrg    def check_for_broken_parentheses(self):
a448f87cSmrg        for entry in self.changelog_entries:
a448f87cSmrg            if entry.parentheses_stack:
a448f87cSmrg                msg = 'bad parentheses wrapping'
a448f87cSmrg                self.errors.append(Error(msg, entry.parentheses_stack[-1]))
a448f87cSmrg
fb8a8121Smrg    def get_file_changelog_location(self, changelog_file):
fb8a8121Smrg        for file in self.info.modified_files:
fb8a8121Smrg            if file[0] == changelog_file:
fb8a8121Smrg                # root ChangeLog file
fb8a8121Smrg                return ''
fb8a8121Smrg            index = file[0].find('/' + changelog_file)
fb8a8121Smrg            if index != -1:
fb8a8121Smrg                return file[0][:index]
fb8a8121Smrg        return None
fb8a8121Smrg
fb8a8121Smrg    def deduce_changelog_locations(self):
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            if not entry.folder:
fb8a8121Smrg                changelog = None
fb8a8121Smrg                for file in entry.files:
fb8a8121Smrg                    location = self.get_file_changelog_location(file)
fb8a8121Smrg                    if (location == ''
a448f87cSmrg                       or (location and location in self.changelog_locations)):
fb8a8121Smrg                        if changelog and changelog != location:
fb8a8121Smrg                            msg = 'could not deduce ChangeLog file, ' \
fb8a8121Smrg                                  'not unique location'
fb8a8121Smrg                            self.errors.append(Error(msg))
fb8a8121Smrg                            return
fb8a8121Smrg                        changelog = location
fb8a8121Smrg                if changelog is not None:
fb8a8121Smrg                    entry.folder = changelog
fb8a8121Smrg                else:
fb8a8121Smrg                    msg = 'could not deduce ChangeLog file'
fb8a8121Smrg                    self.errors.append(Error(msg))
fb8a8121Smrg
fb8a8121Smrg    @classmethod
fb8a8121Smrg    def in_ignored_location(cls, path):
fb8a8121Smrg        for ignored in ignored_prefixes:
fb8a8121Smrg            if path.startswith(ignored):
fb8a8121Smrg                return True
fb8a8121Smrg        return False
fb8a8121Smrg
a448f87cSmrg    def get_changelog_by_path(self, path):
fb8a8121Smrg        components = path.split('/')
fb8a8121Smrg        while components:
a448f87cSmrg            if '/'.join(components) in self.changelog_locations:
fb8a8121Smrg                break
fb8a8121Smrg            components = components[:-1]
fb8a8121Smrg        return '/'.join(components)
fb8a8121Smrg
fb8a8121Smrg    def check_mentioned_files(self):
fb8a8121Smrg        folder_count = len([x.folder for x in self.changelog_entries])
fb8a8121Smrg        assert folder_count == len(self.changelog_entries)
fb8a8121Smrg
fb8a8121Smrg        mentioned_files = set()
fb8a8121Smrg        mentioned_patterns = []
fb8a8121Smrg        used_patterns = set()
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            if not entry.files and not entry.file_patterns:
fb8a8121Smrg                msg = 'no files mentioned for ChangeLog in directory'
fb8a8121Smrg                self.errors.append(Error(msg, entry.folder))
fb8a8121Smrg            assert not entry.folder.endswith('/')
fb8a8121Smrg            for file in entry.files:
fb8a8121Smrg                if not self.is_changelog_filename(file):
a448f87cSmrg                    item = os.path.join(entry.folder, file)
a448f87cSmrg                    if item in mentioned_files:
a448f87cSmrg                        msg = 'same file specified multiple times'
a448f87cSmrg                        self.errors.append(Error(msg, file))
a448f87cSmrg                    else:
a448f87cSmrg                        mentioned_files.add(item)
fb8a8121Smrg            for pattern in entry.file_patterns:
fb8a8121Smrg                mentioned_patterns.append(os.path.join(entry.folder, pattern))
fb8a8121Smrg
fb8a8121Smrg        cand = [x[0] for x in self.info.modified_files
fb8a8121Smrg                if not self.is_changelog_filename(x[0])]
fb8a8121Smrg        changed_files = set(cand)
fb8a8121Smrg        for file in sorted(mentioned_files - changed_files):
fb8a8121Smrg            msg = 'unchanged file mentioned in a ChangeLog'
fb8a8121Smrg            candidates = difflib.get_close_matches(file, changed_files, 1)
a448f87cSmrg            details = None
fb8a8121Smrg            if candidates:
fb8a8121Smrg                msg += f' (did you mean "{candidates[0]}"?)'
a448f87cSmrg                details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
a448f87cSmrg            self.errors.append(Error(msg, file, details))
fb8a8121Smrg        for file in sorted(changed_files - mentioned_files):
fb8a8121Smrg            if not self.in_ignored_location(file):
fb8a8121Smrg                if file in self.new_files:
fb8a8121Smrg                    changelog_location = self.get_changelog_by_path(file)
fb8a8121Smrg                    # Python2: we cannot use next(filter(...))
fb8a8121Smrg                    entries = filter(lambda x: x.folder == changelog_location,
fb8a8121Smrg                                     self.changelog_entries)
fb8a8121Smrg                    entries = list(entries)
fb8a8121Smrg                    entry = entries[0] if entries else None
fb8a8121Smrg                    if not entry:
fb8a8121Smrg                        prs = self.top_level_prs
fb8a8121Smrg                        if not prs:
fb8a8121Smrg                            # if all ChangeLog entries have identical PRs
fb8a8121Smrg                            # then use them
fb8a8121Smrg                            prs = self.changelog_entries[0].prs
fb8a8121Smrg                            for entry in self.changelog_entries:
fb8a8121Smrg                                if entry.prs != prs:
fb8a8121Smrg                                    prs = []
fb8a8121Smrg                                    break
fb8a8121Smrg                        entry = ChangeLogEntry(changelog_location,
fb8a8121Smrg                                               self.top_level_authors,
fb8a8121Smrg                                               prs)
fb8a8121Smrg                        self.changelog_entries.append(entry)
fb8a8121Smrg                    # strip prefix of the file
fb8a8121Smrg                    assert file.startswith(entry.folder)
a448f87cSmrg                    # do not allow auto-addition of New files
a448f87cSmrg                    # for the top-level folder
a448f87cSmrg                    if entry.folder:
fb8a8121Smrg                        file = file[len(entry.folder):].lstrip('/')
fb8a8121Smrg                        entry.lines.append('\t* %s: New file.' % file)
fb8a8121Smrg                        entry.files.append(file)
fb8a8121Smrg                    else:
a448f87cSmrg                        msg = 'new file in the top-level folder not mentioned in a ChangeLog'
a448f87cSmrg                        self.errors.append(Error(msg, file))
a448f87cSmrg                else:
fb8a8121Smrg                    used_pattern = [p for p in mentioned_patterns
fb8a8121Smrg                                    if file.startswith(p)]
fb8a8121Smrg                    used_pattern = used_pattern[0] if used_pattern else None
fb8a8121Smrg                    if used_pattern:
fb8a8121Smrg                        used_patterns.add(used_pattern)
fb8a8121Smrg                    else:
fb8a8121Smrg                        msg = 'changed file not mentioned in a ChangeLog'
fb8a8121Smrg                        self.errors.append(Error(msg, file))
fb8a8121Smrg
fb8a8121Smrg        for pattern in mentioned_patterns:
fb8a8121Smrg            if pattern not in used_patterns:
fb8a8121Smrg                error = "pattern doesn't match any changed files"
fb8a8121Smrg                self.errors.append(Error(error, pattern))
fb8a8121Smrg
fb8a8121Smrg    def check_for_correct_changelog(self):
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            for file in entry.files:
fb8a8121Smrg                full_path = os.path.join(entry.folder, file)
fb8a8121Smrg                changelog_location = self.get_changelog_by_path(full_path)
fb8a8121Smrg                if changelog_location != entry.folder:
fb8a8121Smrg                    msg = 'wrong ChangeLog location "%s", should be "%s"'
fb8a8121Smrg                    err = Error(msg % (entry.folder, changelog_location), file)
fb8a8121Smrg                    self.errors.append(err)
fb8a8121Smrg
fb8a8121Smrg    @classmethod
fb8a8121Smrg    def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
fb8a8121Smrg        output = ''
fb8a8121Smrg        for i, author in enumerate(authors):
fb8a8121Smrg            if i == 0:
fb8a8121Smrg                output += '%s%s  %s\n' % (prefix, timestamp, author)
fb8a8121Smrg            else:
fb8a8121Smrg                output += '%s\t    %s\n' % (prefix, author)
fb8a8121Smrg        output += '\n'
fb8a8121Smrg        return output
fb8a8121Smrg
fb8a8121Smrg    def to_changelog_entries(self, use_commit_ts=False):
fb8a8121Smrg        current_timestamp = self.info.date.strftime(DATE_FORMAT)
fb8a8121Smrg        for entry in self.changelog_entries:
fb8a8121Smrg            output = ''
fb8a8121Smrg            timestamp = entry.datetime
fb8a8121Smrg            if self.revert_commit:
fb8a8121Smrg                timestamp = current_timestamp
fb8a8121Smrg                orig_date = self.original_info.date
fb8a8121Smrg                current_timestamp = orig_date.strftime(DATE_FORMAT)
fb8a8121Smrg            elif self.cherry_pick_commit:
fb8a8121Smrg                info = self.commit_to_info_hook(self.cherry_pick_commit)
fb8a8121Smrg                # it can happen that it is a cherry-pick for a different
fb8a8121Smrg                # repository
fb8a8121Smrg                if info:
fb8a8121Smrg                    timestamp = info.date.strftime(DATE_FORMAT)
fb8a8121Smrg                else:
fb8a8121Smrg                    timestamp = current_timestamp
fb8a8121Smrg            elif not timestamp or use_commit_ts:
fb8a8121Smrg                timestamp = current_timestamp
fb8a8121Smrg            authors = entry.authors if entry.authors else [self.info.author]
fb8a8121Smrg            # add Co-Authored-By authors to all ChangeLog entries
fb8a8121Smrg            for author in self.co_authors:
fb8a8121Smrg                if author not in authors:
fb8a8121Smrg                    authors.append(author)
fb8a8121Smrg
fb8a8121Smrg            if self.cherry_pick_commit or self.revert_commit:
fb8a8121Smrg                original_author = self.original_info.author
fb8a8121Smrg                output += self.format_authors_in_changelog([original_author],
fb8a8121Smrg                                                           current_timestamp)
fb8a8121Smrg                if self.revert_commit:
fb8a8121Smrg                    output += '\tRevert:\n'
fb8a8121Smrg                else:
fb8a8121Smrg                    output += '\tBackported from master:\n'
fb8a8121Smrg                output += self.format_authors_in_changelog(authors,
fb8a8121Smrg                                                           timestamp, '\t')
fb8a8121Smrg            else:
fb8a8121Smrg                output += self.format_authors_in_changelog(authors, timestamp)
fb8a8121Smrg            for pr in entry.prs:
fb8a8121Smrg                output += '\t%s\n' % pr
fb8a8121Smrg            for line in entry.lines:
fb8a8121Smrg                output += line + '\n'
fb8a8121Smrg            yield (entry.folder, output.rstrip())
fb8a8121Smrg
fb8a8121Smrg    def print_output(self):
fb8a8121Smrg        for entry, output in self.to_changelog_entries():
fb8a8121Smrg            print('------ %s/ChangeLog ------ ' % entry)
fb8a8121Smrg            print(output)
fb8a8121Smrg
fb8a8121Smrg    def print_errors(self):
fb8a8121Smrg        print('Errors:')
fb8a8121Smrg        for error in self.errors:
fb8a8121Smrg            print(error)
a448f87cSmrg
a448f87cSmrg    def check_commit_email(self):
a448f87cSmrg        # Parse 'Martin Liska  <mliska@suse.cz>'
a448f87cSmrg        email = self.info.author.split(' ')[-1].strip('<>')
a448f87cSmrg
a448f87cSmrg        # Verify that all characters are ASCII
a448f87cSmrg        # TODO: Python 3.7 provides a nicer function: isascii
a448f87cSmrg        if len(email) != len(email.encode()):
a448f87cSmrg            self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))