xref: /netbsd-src/external/gpl3/gcc.old/dist/contrib/gcc-changelog/git_commit.py (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1#!/usr/bin/env python3
2#
3# This file is part of GCC.
4#
5# GCC is free software; you can redistribute it and/or modify it under
6# the terms of the GNU General Public License as published by the Free
7# Software Foundation; either version 3, or (at your option) any later
8# version.
9#
10# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11# WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13# for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with GCC; see the file COPYING3.  If not see
17# <http://www.gnu.org/licenses/>.  */
18
19import difflib
20import os
21import re
22import sys
23
24default_changelog_locations = {
25    'c++tools',
26    'config',
27    'contrib',
28    'contrib/header-tools',
29    'contrib/reghunt',
30    'contrib/regression',
31    'fixincludes',
32    'gcc/ada',
33    'gcc/analyzer',
34    'gcc/brig',
35    'gcc/c',
36    'gcc/c-family',
37    'gcc',
38    'gcc/cp',
39    'gcc/d',
40    'gcc/fortran',
41    'gcc/go',
42    'gcc/jit',
43    'gcc/lto',
44    'gcc/objc',
45    'gcc/objcp',
46    'gcc/po',
47    'gcc/testsuite',
48    'gnattools',
49    'gotools',
50    'include',
51    'intl',
52    'libada',
53    'libatomic',
54    'libbacktrace',
55    'libcc1',
56    'libcody',
57    'libcpp',
58    'libcpp/po',
59    'libdecnumber',
60    'libffi',
61    'libgcc',
62    'libgcc/config/avr/libf7',
63    'libgcc/config/libbid',
64    'libgfortran',
65    'libgomp',
66    'libhsail-rt',
67    'libiberty',
68    'libitm',
69    'libobjc',
70    'liboffloadmic',
71    'libphobos',
72    'libquadmath',
73    'libsanitizer',
74    'libssp',
75    'libstdc++-v3',
76    'libvtv',
77    'lto-plugin',
78    'maintainer-scripts',
79    'zlib'}
80
81bug_components = {
82    'ada',
83    'analyzer',
84    'boehm-gc',
85    'bootstrap',
86    'c',
87    'c++',
88    'd',
89    'debug',
90    'demangler',
91    'driver',
92    'fastjar',
93    'fortran',
94    'gcov-profile',
95    'go',
96    'hsa',
97    'inline-asm',
98    'ipa',
99    'java',
100    'jit',
101    'libbacktrace',
102    'libf2c',
103    'libffi',
104    'libfortran',
105    'libgcc',
106    'libgcj',
107    'libgomp',
108    'libitm',
109    'libobjc',
110    'libquadmath',
111    'libstdc++',
112    'lto',
113    'middle-end',
114    'modula2',
115    'objc',
116    'objc++',
117    'other',
118    'pch',
119    'pending',
120    'plugins',
121    'preprocessor',
122    'regression',
123    'rtl-optimization',
124    'sanitizer',
125    'spam',
126    'target',
127    'testsuite',
128    'translation',
129    'tree-optimization',
130    'web'}
131
132ignored_prefixes = {
133    'gcc/d/dmd/',
134    'gcc/go/gofrontend/',
135    'gcc/testsuite/gdc.test/',
136    'gcc/testsuite/go.test/test/',
137    'libffi/',
138    'libgo/',
139    'libphobos/libdruntime/',
140    'libphobos/src/',
141    'libsanitizer/',
142    }
143
144wildcard_prefixes = {
145    'gcc/testsuite/',
146    'libstdc++-v3/doc/html/',
147    'libstdc++-v3/testsuite/'
148    }
149
150misc_files = {
151    'gcc/DATESTAMP',
152    'gcc/BASE-VER',
153    'gcc/DEV-PHASE'
154    }
155
156author_line_regex = \
157        re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
158additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
159changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
160subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
161subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
162pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
163dr_regex = re.compile(r'\tDR ([0-9]+)$')
164star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
165end_of_location_regex = re.compile(r'[\[<(:]')
166item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
167item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
168revert_regex = re.compile(r'This reverts commit (?P<hash>\w+).$')
169cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
170
171LINE_LIMIT = 100
172TAB_WIDTH = 8
173CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
174
175REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
176                   'acked-by: ', 'tested-by: ', 'reported-by: ',
177                   'suggested-by: ')
178DATE_FORMAT = '%Y-%m-%d'
179
180
181def decode_path(path):
182    # When core.quotepath is true (default value), utf8 chars are encoded like:
183    # "b/ko\304\215ka.txt"
184    #
185    # The upstream bug is fixed:
186    # https://github.com/gitpython-developers/GitPython/issues/1099
187    #
188    # but we still need a workaround for older versions of the library.
189    # Please take a look at the explanation of the transformation:
190    # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
191
192    if path.startswith('"') and path.endswith('"'):
193        return (path.strip('"').encode('utf8').decode('unicode-escape')
194                .encode('latin-1').decode('utf8'))
195    else:
196        return path
197
198
199class Error:
200    def __init__(self, message, line=None, details=None):
201        self.message = message
202        self.line = line
203        self.details = details
204
205    def __repr__(self):
206        s = self.message
207        if self.line:
208            s += ': "%s"' % self.line
209        return s
210
211
212class ChangeLogEntry:
213    def __init__(self, folder, authors, prs):
214        self.folder = folder
215        # The 'list.copy()' function is not available before Python 3.3
216        self.author_lines = list(authors)
217        self.initial_prs = list(prs)
218        self.prs = list(prs)
219        self.lines = []
220        self.files = []
221        self.file_patterns = []
222        self.parentheses_stack = []
223
224    def parse_file_names(self):
225        # Whether the content currently processed is between a star prefix the
226        # end of the file list: a colon or an open paren.
227        in_location = False
228
229        for line in self.lines:
230            # If this line matches the star prefix, start the location
231            # processing on the information that follows the star.
232            # Note that we need to skip macro names that can be in form of:
233            #
234            # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
235            # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
236            # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
237            #
238            m = star_prefix_regex.match(line)
239            if m and len(m.group('spaces')) == 1:
240                in_location = True
241                line = m.group('content')
242
243            if in_location:
244                # Strip everything that is not a filename in "line":
245                # entities "(NAME)", cases "<PATTERN>", conditions
246                # "[COND]", entry text (the colon, if present, and
247                # anything that follows it).
248                m = end_of_location_regex.search(line)
249                if m:
250                    line = line[:m.start()]
251                    in_location = False
252
253                # At this point, all that's left is a list of filenames
254                # separated by commas and whitespaces.
255                for file in line.split(','):
256                    file = file.strip()
257                    if file:
258                        if file.endswith('*'):
259                            self.file_patterns.append(file[:-1])
260                        else:
261                            self.files.append(file)
262
263    @property
264    def datetime(self):
265        for author in self.author_lines:
266            if author[1]:
267                return author[1]
268        return None
269
270    @property
271    def authors(self):
272        return [author_line[0] for author_line in self.author_lines]
273
274    @property
275    def is_empty(self):
276        return not self.lines and self.prs == self.initial_prs
277
278    def contains_author(self, author):
279        for author_lines in self.author_lines:
280            if author_lines[0] == author:
281                return True
282        return False
283
284
285class GitInfo:
286    def __init__(self, hexsha, date, author, lines, modified_files):
287        self.hexsha = hexsha
288        self.date = date
289        self.author = author
290        self.lines = lines
291        self.modified_files = modified_files
292
293
294class GitCommit:
295    def __init__(self, info, commit_to_info_hook=None, ref_name=None):
296        self.original_info = info
297        self.info = info
298        self.message = None
299        self.changes = None
300        self.changelog_entries = []
301        self.errors = []
302        self.top_level_authors = []
303        self.co_authors = []
304        self.top_level_prs = []
305        self.subject_prs = set()
306        self.cherry_pick_commit = None
307        self.revert_commit = None
308        self.commit_to_info_hook = commit_to_info_hook
309        self.init_changelog_locations(ref_name)
310
311        # Skip Update copyright years commits
312        if self.info.lines and self.info.lines[0] == 'Update copyright years.':
313            return
314
315        if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
316            self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
317
318        # Identify first if the commit is a Revert commit
319        for line in self.info.lines:
320            m = revert_regex.match(line)
321            if m:
322                self.revert_commit = m.group('hash')
323                break
324        if self.revert_commit:
325            self.info = self.commit_to_info_hook(self.revert_commit)
326
327        # The following happens for get_email.py:
328        if not self.info:
329            return
330
331        self.check_commit_email()
332
333        # Extract PR numbers form the subject line
334        # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
335        if self.info.lines and not self.revert_commit:
336            self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
337            for m in subject_pr_regex.finditer(info.lines[0]):
338                if not m.group('component') in bug_components:
339                    self.errors.append(Error('invalid PR component in subject', info.lines[0]))
340                self.subject_prs.add(m.group('pr'))
341
342        # Allow complete deletion of ChangeLog files in a commit
343        project_files = [f for f in self.info.modified_files
344                         if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
345                         or f[0] in misc_files]
346        ignored_files = [f for f in self.info.modified_files
347                         if self.in_ignored_location(f[0])]
348        if len(project_files) == len(self.info.modified_files):
349            # All modified files are only MISC files
350            return
351        elif project_files:
352            err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
353                  'should be done separately from normal commits\n' \
354                  '(note: ChangeLog entries will be automatically ' \
355                  'added by a cron job)'
356            self.errors.append(Error(err))
357            return
358
359        all_are_ignored = (len(project_files) + len(ignored_files)
360                           == len(self.info.modified_files))
361        self.parse_lines(all_are_ignored)
362        if self.changes:
363            self.parse_changelog()
364            self.parse_file_names()
365            self.check_for_empty_description()
366            self.check_for_broken_parentheses()
367            self.deduce_changelog_locations()
368            self.check_file_patterns()
369            if not self.errors:
370                self.check_mentioned_files()
371                self.check_for_correct_changelog()
372        if self.subject_prs:
373            self.errors.append(Error('PR %s in subject but not in changelog' %
374                                     ', '.join(self.subject_prs), self.info.lines[0]))
375
376    @property
377    def success(self):
378        return not self.errors
379
380    @property
381    def new_files(self):
382        return [x[0] for x in self.info.modified_files if x[1] == 'A']
383
384    @classmethod
385    def is_changelog_filename(cls, path, allow_suffix=False):
386        basename = os.path.basename(path)
387        if basename == 'ChangeLog':
388            return True
389        elif allow_suffix and basename.startswith('ChangeLog'):
390            return True
391        else:
392            return False
393
394    def find_changelog_location(self, name):
395        if name.startswith('\t'):
396            name = name[1:]
397        if name.endswith(':'):
398            name = name[:-1]
399        if name.endswith('/'):
400            name = name[:-1]
401        return name if name in self.changelog_locations else None
402
403    @classmethod
404    def format_git_author(cls, author):
405        assert '<' in author
406        return author.replace('<', ' <')
407
408    @classmethod
409    def parse_git_name_status(cls, string):
410        modified_files = []
411        for entry in string.split('\n'):
412            parts = entry.split('\t')
413            t = parts[0]
414            if t == 'A' or t == 'D' or t == 'M':
415                modified_files.append((parts[1], t))
416            elif t.startswith('R'):
417                modified_files.append((parts[1], 'D'))
418                modified_files.append((parts[2], 'A'))
419        return modified_files
420
421    def init_changelog_locations(self, ref_name):
422        self.changelog_locations = list(default_changelog_locations)
423        if ref_name:
424            version = sys.maxsize
425            if 'releases/gcc-' in ref_name:
426                version = int(ref_name.split('-')[-1])
427            if version >= 12:
428                # HSA and BRIG were removed in GCC 12
429                self.changelog_locations.remove('gcc/brig')
430                self.changelog_locations.remove('libhsail-rt')
431
432    def parse_lines(self, all_are_ignored):
433        body = self.info.lines
434
435        for i, b in enumerate(body):
436            if not b:
437                continue
438            if (changelog_regex.match(b) or self.find_changelog_location(b)
439                    or star_prefix_regex.match(b) or pr_regex.match(b)
440                    or dr_regex.match(b) or author_line_regex.match(b)
441                    or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
442                self.changes = body[i:]
443                return
444        if not all_are_ignored:
445            self.errors.append(Error('cannot find a ChangeLog location in '
446                                     'message'))
447
448    def parse_changelog(self):
449        last_entry = None
450        will_deduce = False
451        for line in self.changes:
452            if not line:
453                if last_entry and will_deduce:
454                    last_entry = None
455                continue
456            if line != line.rstrip():
457                self.errors.append(Error('trailing whitespace', line))
458            if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
459                # support long filenames
460                if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
461                    self.errors.append(Error('line exceeds %d character limit'
462                                             % LINE_LIMIT, line))
463            m = changelog_regex.match(line)
464            if m:
465                last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
466                                            self.top_level_authors,
467                                            self.top_level_prs)
468                self.changelog_entries.append(last_entry)
469            elif self.find_changelog_location(line):
470                last_entry = ChangeLogEntry(self.find_changelog_location(line),
471                                            self.top_level_authors,
472                                            self.top_level_prs)
473                self.changelog_entries.append(last_entry)
474            else:
475                author_tuple = None
476                pr_line = None
477                if author_line_regex.match(line):
478                    m = author_line_regex.match(line)
479                    author_tuple = (m.group('name'), m.group('datetime'))
480                elif additional_author_regex.match(line):
481                    m = additional_author_regex.match(line)
482                    if len(m.group('spaces')) != 4:
483                        msg = 'additional author must be indented with '\
484                              'one tab and four spaces'
485                        self.errors.append(Error(msg, line))
486                    else:
487                        author_tuple = (m.group('name'), None)
488                elif pr_regex.match(line):
489                    m = pr_regex.match(line)
490                    component = m.group('component')
491                    pr = m.group('pr')
492                    if not component:
493                        self.errors.append(Error('missing PR component', line))
494                        continue
495                    elif not component[:-1] in bug_components:
496                        self.errors.append(Error('invalid PR component', line))
497                        continue
498                    else:
499                        pr_line = line.lstrip()
500                    if pr in self.subject_prs:
501                        self.subject_prs.remove(pr)
502                elif dr_regex.match(line):
503                    pr_line = line.lstrip()
504
505                lowered_line = line.lower()
506                if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
507                    name = line[len(CO_AUTHORED_BY_PREFIX):]
508                    author = self.format_git_author(name)
509                    self.co_authors.append(author)
510                    continue
511                elif lowered_line.startswith(REVIEW_PREFIXES):
512                    continue
513                else:
514                    m = cherry_pick_regex.search(line)
515                    if m:
516                        commit = m.group('hash')
517                        if self.cherry_pick_commit:
518                            msg = 'multiple cherry pick lines'
519                            self.errors.append(Error(msg, line))
520                        else:
521                            self.cherry_pick_commit = commit
522                        continue
523
524                # ChangeLog name will be deduced later
525                if not last_entry:
526                    if author_tuple:
527                        self.top_level_authors.append(author_tuple)
528                        continue
529                    elif pr_line:
530                        # append to top_level_prs only when we haven't met
531                        # a ChangeLog entry
532                        if (pr_line not in self.top_level_prs
533                                and not self.changelog_entries):
534                            self.top_level_prs.append(pr_line)
535                        continue
536                    else:
537                        last_entry = ChangeLogEntry(None,
538                                                    self.top_level_authors,
539                                                    self.top_level_prs)
540                        self.changelog_entries.append(last_entry)
541                        will_deduce = True
542                elif author_tuple:
543                    if not last_entry.contains_author(author_tuple[0]):
544                        last_entry.author_lines.append(author_tuple)
545                    continue
546
547                if not line.startswith('\t'):
548                    err = Error('line should start with a tab', line)
549                    self.errors.append(err)
550                elif pr_line:
551                    last_entry.prs.append(pr_line)
552                else:
553                    m = star_prefix_regex.match(line)
554                    if m:
555                        if (len(m.group('spaces')) != 1 and
556                                not last_entry.parentheses_stack):
557                            msg = 'one space should follow asterisk'
558                            self.errors.append(Error(msg, line))
559                        else:
560                            content = m.group('content')
561                            parts = content.split(':')
562                            if len(parts) > 1:
563                                for needle in ('()', '[]', '<>'):
564                                    if ' ' + needle in parts[0]:
565                                        msg = f'empty group "{needle}" found'
566                                        self.errors.append(Error(msg, line))
567                            last_entry.lines.append(line)
568                            self.process_parentheses(last_entry, line)
569                    else:
570                        if last_entry.is_empty:
571                            msg = 'first line should start with a tab, ' \
572                                  'an asterisk and a space'
573                            self.errors.append(Error(msg, line))
574                        else:
575                            last_entry.lines.append(line)
576                            self.process_parentheses(last_entry, line)
577
578    def process_parentheses(self, last_entry, line):
579        for c in line:
580            if c == '(':
581                last_entry.parentheses_stack.append(line)
582            elif c == ')':
583                if not last_entry.parentheses_stack:
584                    msg = 'bad wrapping of parenthesis'
585                    self.errors.append(Error(msg, line))
586                else:
587                    del last_entry.parentheses_stack[-1]
588
589    def parse_file_names(self):
590        for entry in self.changelog_entries:
591            entry.parse_file_names()
592
593    def check_file_patterns(self):
594        for entry in self.changelog_entries:
595            for pattern in entry.file_patterns:
596                name = os.path.join(entry.folder, pattern)
597                if not [name.startswith(pr) for pr in wildcard_prefixes]:
598                    msg = 'unsupported wildcard prefix'
599                    self.errors.append(Error(msg, name))
600
601    def check_for_empty_description(self):
602        for entry in self.changelog_entries:
603            for i, line in enumerate(entry.lines):
604                if (item_empty_regex.match(line) and
605                    (i == len(entry.lines) - 1
606                     or not entry.lines[i+1].strip()
607                     or item_parenthesis_regex.match(entry.lines[i+1]))):
608                    msg = 'missing description of a change'
609                    self.errors.append(Error(msg, line))
610
611    def check_for_broken_parentheses(self):
612        for entry in self.changelog_entries:
613            if entry.parentheses_stack:
614                msg = 'bad parentheses wrapping'
615                self.errors.append(Error(msg, entry.parentheses_stack[-1]))
616
617    def get_file_changelog_location(self, changelog_file):
618        for file in self.info.modified_files:
619            if file[0] == changelog_file:
620                # root ChangeLog file
621                return ''
622            index = file[0].find('/' + changelog_file)
623            if index != -1:
624                return file[0][:index]
625        return None
626
627    def deduce_changelog_locations(self):
628        for entry in self.changelog_entries:
629            if not entry.folder:
630                changelog = None
631                for file in entry.files:
632                    location = self.get_file_changelog_location(file)
633                    if (location == ''
634                       or (location and location in self.changelog_locations)):
635                        if changelog and changelog != location:
636                            msg = 'could not deduce ChangeLog file, ' \
637                                  'not unique location'
638                            self.errors.append(Error(msg))
639                            return
640                        changelog = location
641                if changelog is not None:
642                    entry.folder = changelog
643                else:
644                    msg = 'could not deduce ChangeLog file'
645                    self.errors.append(Error(msg))
646
647    @classmethod
648    def in_ignored_location(cls, path):
649        for ignored in ignored_prefixes:
650            if path.startswith(ignored):
651                return True
652        return False
653
654    def get_changelog_by_path(self, path):
655        components = path.split('/')
656        while components:
657            if '/'.join(components) in self.changelog_locations:
658                break
659            components = components[:-1]
660        return '/'.join(components)
661
662    def check_mentioned_files(self):
663        folder_count = len([x.folder for x in self.changelog_entries])
664        assert folder_count == len(self.changelog_entries)
665
666        mentioned_files = set()
667        mentioned_patterns = []
668        used_patterns = set()
669        for entry in self.changelog_entries:
670            if not entry.files and not entry.file_patterns:
671                msg = 'no files mentioned for ChangeLog in directory'
672                self.errors.append(Error(msg, entry.folder))
673            assert not entry.folder.endswith('/')
674            for file in entry.files:
675                if not self.is_changelog_filename(file):
676                    item = os.path.join(entry.folder, file)
677                    if item in mentioned_files:
678                        msg = 'same file specified multiple times'
679                        self.errors.append(Error(msg, file))
680                    else:
681                        mentioned_files.add(item)
682            for pattern in entry.file_patterns:
683                mentioned_patterns.append(os.path.join(entry.folder, pattern))
684
685        cand = [x[0] for x in self.info.modified_files
686                if not self.is_changelog_filename(x[0])]
687        changed_files = set(cand)
688        for file in sorted(mentioned_files - changed_files):
689            msg = 'unchanged file mentioned in a ChangeLog'
690            candidates = difflib.get_close_matches(file, changed_files, 1)
691            details = None
692            if candidates:
693                msg += f' (did you mean "{candidates[0]}"?)'
694                details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
695            self.errors.append(Error(msg, file, details))
696        for file in sorted(changed_files - mentioned_files):
697            if not self.in_ignored_location(file):
698                if file in self.new_files:
699                    changelog_location = self.get_changelog_by_path(file)
700                    # Python2: we cannot use next(filter(...))
701                    entries = filter(lambda x: x.folder == changelog_location,
702                                     self.changelog_entries)
703                    entries = list(entries)
704                    entry = entries[0] if entries else None
705                    if not entry:
706                        prs = self.top_level_prs
707                        if not prs:
708                            # if all ChangeLog entries have identical PRs
709                            # then use them
710                            prs = self.changelog_entries[0].prs
711                            for entry in self.changelog_entries:
712                                if entry.prs != prs:
713                                    prs = []
714                                    break
715                        entry = ChangeLogEntry(changelog_location,
716                                               self.top_level_authors,
717                                               prs)
718                        self.changelog_entries.append(entry)
719                    # strip prefix of the file
720                    assert file.startswith(entry.folder)
721                    # do not allow auto-addition of New files
722                    # for the top-level folder
723                    if entry.folder:
724                        file = file[len(entry.folder):].lstrip('/')
725                        entry.lines.append('\t* %s: New file.' % file)
726                        entry.files.append(file)
727                    else:
728                        msg = 'new file in the top-level folder not mentioned in a ChangeLog'
729                        self.errors.append(Error(msg, file))
730                else:
731                    used_pattern = [p for p in mentioned_patterns
732                                    if file.startswith(p)]
733                    used_pattern = used_pattern[0] if used_pattern else None
734                    if used_pattern:
735                        used_patterns.add(used_pattern)
736                    else:
737                        msg = 'changed file not mentioned in a ChangeLog'
738                        self.errors.append(Error(msg, file))
739
740        for pattern in mentioned_patterns:
741            if pattern not in used_patterns:
742                error = "pattern doesn't match any changed files"
743                self.errors.append(Error(error, pattern))
744
745    def check_for_correct_changelog(self):
746        for entry in self.changelog_entries:
747            for file in entry.files:
748                full_path = os.path.join(entry.folder, file)
749                changelog_location = self.get_changelog_by_path(full_path)
750                if changelog_location != entry.folder:
751                    msg = 'wrong ChangeLog location "%s", should be "%s"'
752                    err = Error(msg % (entry.folder, changelog_location), file)
753                    self.errors.append(err)
754
755    @classmethod
756    def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
757        output = ''
758        for i, author in enumerate(authors):
759            if i == 0:
760                output += '%s%s  %s\n' % (prefix, timestamp, author)
761            else:
762                output += '%s\t    %s\n' % (prefix, author)
763        output += '\n'
764        return output
765
766    def to_changelog_entries(self, use_commit_ts=False):
767        current_timestamp = self.info.date.strftime(DATE_FORMAT)
768        for entry in self.changelog_entries:
769            output = ''
770            timestamp = entry.datetime
771            if self.revert_commit:
772                timestamp = current_timestamp
773                orig_date = self.original_info.date
774                current_timestamp = orig_date.strftime(DATE_FORMAT)
775            elif self.cherry_pick_commit:
776                info = self.commit_to_info_hook(self.cherry_pick_commit)
777                # it can happen that it is a cherry-pick for a different
778                # repository
779                if info:
780                    timestamp = info.date.strftime(DATE_FORMAT)
781                else:
782                    timestamp = current_timestamp
783            elif not timestamp or use_commit_ts:
784                timestamp = current_timestamp
785            authors = entry.authors if entry.authors else [self.info.author]
786            # add Co-Authored-By authors to all ChangeLog entries
787            for author in self.co_authors:
788                if author not in authors:
789                    authors.append(author)
790
791            if self.cherry_pick_commit or self.revert_commit:
792                original_author = self.original_info.author
793                output += self.format_authors_in_changelog([original_author],
794                                                           current_timestamp)
795                if self.revert_commit:
796                    output += '\tRevert:\n'
797                else:
798                    output += '\tBackported from master:\n'
799                output += self.format_authors_in_changelog(authors,
800                                                           timestamp, '\t')
801            else:
802                output += self.format_authors_in_changelog(authors, timestamp)
803            for pr in entry.prs:
804                output += '\t%s\n' % pr
805            for line in entry.lines:
806                output += line + '\n'
807            yield (entry.folder, output.rstrip())
808
809    def print_output(self):
810        for entry, output in self.to_changelog_entries():
811            print('------ %s/ChangeLog ------ ' % entry)
812            print(output)
813
814    def print_errors(self):
815        print('Errors:')
816        for error in self.errors:
817            print(error)
818
819    def check_commit_email(self):
820        # Parse 'Martin Liska  <mliska@suse.cz>'
821        email = self.info.author.split(' ')[-1].strip('<>')
822
823        # Verify that all characters are ASCII
824        # TODO: Python 3.7 provides a nicer function: isascii
825        if len(email) != len(email.encode()):
826            self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))
827