xref: /netbsd-src/external/gpl3/gcc/dist/contrib/gcc-changelog/git_commit.py (revision 82d56013d7b633d116a93943de88e08335357a7c)
1#!/usr/bin/env python3
2#
3# This file is part of GCC.
4#
5# GCC is free software; you can redistribute it and/or modify it under
6# the terms of the GNU General Public License as published by the Free
7# Software Foundation; either version 3, or (at your option) any later
8# version.
9#
10# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11# WARRANTY; without even the implied warranty of MERCHANTABILITY or
12# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13# for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with GCC; see the file COPYING3.  If not see
17# <http://www.gnu.org/licenses/>.  */
18
19import difflib
20import os
21import re
22
23changelog_locations = {
24    'c++tools',
25    'config',
26    'contrib',
27    'contrib/header-tools',
28    'contrib/reghunt',
29    'contrib/regression',
30    'fixincludes',
31    'gcc/ada',
32    'gcc/analyzer',
33    'gcc/brig',
34    'gcc/c',
35    'gcc/c-family',
36    'gcc',
37    'gcc/cp',
38    'gcc/d',
39    'gcc/fortran',
40    'gcc/go',
41    'gcc/jit',
42    'gcc/lto',
43    'gcc/objc',
44    'gcc/objcp',
45    'gcc/po',
46    'gcc/testsuite',
47    'gnattools',
48    'gotools',
49    'include',
50    'intl',
51    'libada',
52    'libatomic',
53    'libbacktrace',
54    'libcc1',
55    'libcody',
56    'libcpp',
57    'libcpp/po',
58    'libdecnumber',
59    'libffi',
60    'libgcc',
61    'libgcc/config/avr/libf7',
62    'libgcc/config/libbid',
63    'libgfortran',
64    'libgomp',
65    'libhsail-rt',
66    'libiberty',
67    'libitm',
68    'libobjc',
69    'liboffloadmic',
70    'libphobos',
71    'libquadmath',
72    'libsanitizer',
73    'libssp',
74    'libstdc++-v3',
75    'libvtv',
76    'lto-plugin',
77    'maintainer-scripts',
78    'zlib'}
79
80bug_components = {
81    'ada',
82    'analyzer',
83    'boehm-gc',
84    'bootstrap',
85    'c',
86    'c++',
87    'd',
88    'debug',
89    'demangler',
90    'driver',
91    'fastjar',
92    'fortran',
93    'gcov-profile',
94    'go',
95    'hsa',
96    'inline-asm',
97    'ipa',
98    'java',
99    'jit',
100    'libbacktrace',
101    'libf2c',
102    'libffi',
103    'libfortran',
104    'libgcc',
105    'libgcj',
106    'libgomp',
107    'libitm',
108    'libobjc',
109    'libquadmath',
110    'libstdc++',
111    'lto',
112    'middle-end',
113    'modula2',
114    'objc',
115    'objc++',
116    'other',
117    'pch',
118    'pending',
119    'plugins',
120    'preprocessor',
121    'regression',
122    'rtl-optimization',
123    'sanitizer',
124    'spam',
125    'target',
126    'testsuite',
127    'translation',
128    'tree-optimization',
129    'web'}
130
131ignored_prefixes = {
132    'gcc/d/dmd/',
133    'gcc/go/gofrontend/',
134    'gcc/testsuite/gdc.test/',
135    'gcc/testsuite/go.test/test/',
136    'libgo/',
137    'libphobos/libdruntime/',
138    'libphobos/src/',
139    'libsanitizer/',
140    }
141
142wildcard_prefixes = {
143    'gcc/testsuite/',
144    'libstdc++-v3/doc/html/',
145    'libstdc++-v3/testsuite/'
146    }
147
148misc_files = {
149    'gcc/DATESTAMP',
150    'gcc/BASE-VER',
151    'gcc/DEV-PHASE'
152    }
153
154author_line_regex = \
155        re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
156additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
157changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
158pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$')
159dr_regex = re.compile(r'\tDR ([0-9]+)$')
160star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
161end_of_location_regex = re.compile(r'[\[<(:]')
162item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
163item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
164revert_regex = re.compile(r'This reverts commit (?P<hash>\w+).$')
165cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
166
167LINE_LIMIT = 100
168TAB_WIDTH = 8
169CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
170
171REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
172                   'acked-by: ', 'tested-by: ', 'reported-by: ',
173                   'suggested-by: ')
174DATE_FORMAT = '%Y-%m-%d'
175
176
177def decode_path(path):
178    # When core.quotepath is true (default value), utf8 chars are encoded like:
179    # "b/ko\304\215ka.txt"
180    #
181    # The upstream bug is fixed:
182    # https://github.com/gitpython-developers/GitPython/issues/1099
183    #
184    # but we still need a workaround for older versions of the library.
185    # Please take a look at the explanation of the transformation:
186    # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
187
188    if path.startswith('"') and path.endswith('"'):
189        return (path.strip('"').encode('utf8').decode('unicode-escape')
190                .encode('latin-1').decode('utf8'))
191    else:
192        return path
193
194
195class Error:
196    def __init__(self, message, line=None):
197        self.message = message
198        self.line = line
199
200    def __repr__(self):
201        s = self.message
202        if self.line:
203            s += ':"%s"' % self.line
204        return s
205
206
207class ChangeLogEntry:
208    def __init__(self, folder, authors, prs):
209        self.folder = folder
210        # The 'list.copy()' function is not available before Python 3.3
211        self.author_lines = list(authors)
212        self.initial_prs = list(prs)
213        self.prs = list(prs)
214        self.lines = []
215        self.files = []
216        self.file_patterns = []
217
218    def parse_file_names(self):
219        # Whether the content currently processed is between a star prefix the
220        # end of the file list: a colon or an open paren.
221        in_location = False
222
223        for line in self.lines:
224            # If this line matches the star prefix, start the location
225            # processing on the information that follows the star.
226            m = star_prefix_regex.match(line)
227            if m:
228                in_location = True
229                line = m.group('content')
230
231            if in_location:
232                # Strip everything that is not a filename in "line":
233                # entities "(NAME)", cases "<PATTERN>", conditions
234                # "[COND]", entry text (the colon, if present, and
235                # anything that follows it).
236                m = end_of_location_regex.search(line)
237                if m:
238                    line = line[:m.start()]
239                    in_location = False
240
241                # At this point, all that's left is a list of filenames
242                # separated by commas and whitespaces.
243                for file in line.split(','):
244                    file = file.strip()
245                    if file:
246                        if file.endswith('*'):
247                            self.file_patterns.append(file[:-1])
248                        else:
249                            self.files.append(file)
250
251    @property
252    def datetime(self):
253        for author in self.author_lines:
254            if author[1]:
255                return author[1]
256        return None
257
258    @property
259    def authors(self):
260        return [author_line[0] for author_line in self.author_lines]
261
262    @property
263    def is_empty(self):
264        return not self.lines and self.prs == self.initial_prs
265
266    def contains_author(self, author):
267        for author_lines in self.author_lines:
268            if author_lines[0] == author:
269                return True
270        return False
271
272
273class GitInfo:
274    def __init__(self, hexsha, date, author, lines, modified_files):
275        self.hexsha = hexsha
276        self.date = date
277        self.author = author
278        self.lines = lines
279        self.modified_files = modified_files
280
281
282class GitCommit:
283    def __init__(self, info, strict=True, commit_to_info_hook=None):
284        self.original_info = info
285        self.info = info
286        self.message = None
287        self.changes = None
288        self.changelog_entries = []
289        self.errors = []
290        self.top_level_authors = []
291        self.co_authors = []
292        self.top_level_prs = []
293        self.cherry_pick_commit = None
294        self.revert_commit = None
295        self.commit_to_info_hook = commit_to_info_hook
296
297        # Skip Update copyright years commits
298        if self.info.lines and self.info.lines[0] == 'Update copyright years.':
299            return
300
301        # Identify first if the commit is a Revert commit
302        for line in self.info.lines:
303            m = revert_regex.match(line)
304            if m:
305                self.revert_commit = m.group('hash')
306                break
307        if self.revert_commit:
308            self.info = self.commit_to_info_hook(self.revert_commit)
309
310        project_files = [f for f in self.info.modified_files
311                         if self.is_changelog_filename(f[0])
312                         or f[0] in misc_files]
313        ignored_files = [f for f in self.info.modified_files
314                         if self.in_ignored_location(f[0])]
315        if len(project_files) == len(self.info.modified_files):
316            # All modified files are only MISC files
317            return
318        elif project_files and strict:
319            self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and '
320                                     'DEV-PHASE updates should be done '
321                                     'separately from normal commits'))
322            return
323
324        all_are_ignored = (len(project_files) + len(ignored_files)
325                           == len(self.info.modified_files))
326        self.parse_lines(all_are_ignored)
327        if self.changes:
328            self.parse_changelog()
329            self.parse_file_names()
330            self.check_for_empty_description()
331            self.deduce_changelog_locations()
332            self.check_file_patterns()
333            if not self.errors:
334                self.check_mentioned_files()
335                self.check_for_correct_changelog()
336
337    @property
338    def success(self):
339        return not self.errors
340
341    @property
342    def new_files(self):
343        return [x[0] for x in self.info.modified_files if x[1] == 'A']
344
345    @classmethod
346    def is_changelog_filename(cls, path):
347        return path.endswith('/ChangeLog') or path == 'ChangeLog'
348
349    @classmethod
350    def find_changelog_location(cls, name):
351        if name.startswith('\t'):
352            name = name[1:]
353        if name.endswith(':'):
354            name = name[:-1]
355        if name.endswith('/'):
356            name = name[:-1]
357        return name if name in changelog_locations else None
358
359    @classmethod
360    def format_git_author(cls, author):
361        assert '<' in author
362        return author.replace('<', ' <')
363
364    @classmethod
365    def parse_git_name_status(cls, string):
366        modified_files = []
367        for entry in string.split('\n'):
368            parts = entry.split('\t')
369            t = parts[0]
370            if t == 'A' or t == 'D' or t == 'M':
371                modified_files.append((parts[1], t))
372            elif t.startswith('R'):
373                modified_files.append((parts[1], 'D'))
374                modified_files.append((parts[2], 'A'))
375        return modified_files
376
377    def parse_lines(self, all_are_ignored):
378        body = self.info.lines
379
380        for i, b in enumerate(body):
381            if not b:
382                continue
383            if (changelog_regex.match(b) or self.find_changelog_location(b)
384                    or star_prefix_regex.match(b) or pr_regex.match(b)
385                    or dr_regex.match(b) or author_line_regex.match(b)):
386                self.changes = body[i:]
387                return
388        if not all_are_ignored:
389            self.errors.append(Error('cannot find a ChangeLog location in '
390                                     'message'))
391
392    def parse_changelog(self):
393        last_entry = None
394        will_deduce = False
395        for line in self.changes:
396            if not line:
397                if last_entry and will_deduce:
398                    last_entry = None
399                continue
400            if line != line.rstrip():
401                self.errors.append(Error('trailing whitespace', line))
402            if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
403                self.errors.append(Error('line exceeds %d character limit'
404                                         % LINE_LIMIT, line))
405            m = changelog_regex.match(line)
406            if m:
407                last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
408                                            self.top_level_authors,
409                                            self.top_level_prs)
410                self.changelog_entries.append(last_entry)
411            elif self.find_changelog_location(line):
412                last_entry = ChangeLogEntry(self.find_changelog_location(line),
413                                            self.top_level_authors,
414                                            self.top_level_prs)
415                self.changelog_entries.append(last_entry)
416            else:
417                author_tuple = None
418                pr_line = None
419                if author_line_regex.match(line):
420                    m = author_line_regex.match(line)
421                    author_tuple = (m.group('name'), m.group('datetime'))
422                elif additional_author_regex.match(line):
423                    m = additional_author_regex.match(line)
424                    if len(m.group('spaces')) != 4:
425                        msg = 'additional author must be indented with '\
426                              'one tab and four spaces'
427                        self.errors.append(Error(msg, line))
428                    else:
429                        author_tuple = (m.group('name'), None)
430                elif pr_regex.match(line):
431                    component = pr_regex.match(line).group('component')
432                    if not component:
433                        self.errors.append(Error('missing PR component', line))
434                        continue
435                    elif not component[:-1] in bug_components:
436                        self.errors.append(Error('invalid PR component', line))
437                        continue
438                    else:
439                        pr_line = line.lstrip()
440                elif dr_regex.match(line):
441                    pr_line = line.lstrip()
442
443                lowered_line = line.lower()
444                if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
445                    name = line[len(CO_AUTHORED_BY_PREFIX):]
446                    author = self.format_git_author(name)
447                    self.co_authors.append(author)
448                    continue
449                elif lowered_line.startswith(REVIEW_PREFIXES):
450                    continue
451                else:
452                    m = cherry_pick_regex.search(line)
453                    if m:
454                        commit = m.group('hash')
455                        if self.cherry_pick_commit:
456                            msg = 'multiple cherry pick lines'
457                            self.errors.append(Error(msg, line))
458                        else:
459                            self.cherry_pick_commit = commit
460                        continue
461
462                # ChangeLog name will be deduced later
463                if not last_entry:
464                    if author_tuple:
465                        self.top_level_authors.append(author_tuple)
466                        continue
467                    elif pr_line:
468                        # append to top_level_prs only when we haven't met
469                        # a ChangeLog entry
470                        if (pr_line not in self.top_level_prs
471                                and not self.changelog_entries):
472                            self.top_level_prs.append(pr_line)
473                        continue
474                    else:
475                        last_entry = ChangeLogEntry(None,
476                                                    self.top_level_authors,
477                                                    self.top_level_prs)
478                        self.changelog_entries.append(last_entry)
479                        will_deduce = True
480                elif author_tuple:
481                    if not last_entry.contains_author(author_tuple[0]):
482                        last_entry.author_lines.append(author_tuple)
483                    continue
484
485                if not line.startswith('\t'):
486                    err = Error('line should start with a tab', line)
487                    self.errors.append(err)
488                elif pr_line:
489                    last_entry.prs.append(pr_line)
490                else:
491                    m = star_prefix_regex.match(line)
492                    if m:
493                        if len(m.group('spaces')) != 1:
494                            msg = 'one space should follow asterisk'
495                            self.errors.append(Error(msg, line))
496                        else:
497                            content = m.group('content')
498                            parts = content.split(':')
499                            if len(parts) > 1:
500                                for needle in ('()', '[]', '<>'):
501                                    if ' ' + needle in parts[0]:
502                                        msg = f'empty group "{needle}" found'
503                                        self.errors.append(Error(msg, line))
504                            last_entry.lines.append(line)
505                    else:
506                        if last_entry.is_empty:
507                            msg = 'first line should start with a tab, ' \
508                                  'an asterisk and a space'
509                            self.errors.append(Error(msg, line))
510                        else:
511                            last_entry.lines.append(line)
512
513    def parse_file_names(self):
514        for entry in self.changelog_entries:
515            entry.parse_file_names()
516
517    def check_file_patterns(self):
518        for entry in self.changelog_entries:
519            for pattern in entry.file_patterns:
520                name = os.path.join(entry.folder, pattern)
521                if not [name.startswith(pr) for pr in wildcard_prefixes]:
522                    msg = 'unsupported wildcard prefix'
523                    self.errors.append(Error(msg, name))
524
525    def check_for_empty_description(self):
526        for entry in self.changelog_entries:
527            for i, line in enumerate(entry.lines):
528                if (item_empty_regex.match(line) and
529                    (i == len(entry.lines) - 1
530                     or not entry.lines[i+1].strip()
531                     or item_parenthesis_regex.match(entry.lines[i+1]))):
532                    msg = 'missing description of a change'
533                    self.errors.append(Error(msg, line))
534
535    def get_file_changelog_location(self, changelog_file):
536        for file in self.info.modified_files:
537            if file[0] == changelog_file:
538                # root ChangeLog file
539                return ''
540            index = file[0].find('/' + changelog_file)
541            if index != -1:
542                return file[0][:index]
543        return None
544
545    def deduce_changelog_locations(self):
546        for entry in self.changelog_entries:
547            if not entry.folder:
548                changelog = None
549                for file in entry.files:
550                    location = self.get_file_changelog_location(file)
551                    if (location == ''
552                       or (location and location in changelog_locations)):
553                        if changelog and changelog != location:
554                            msg = 'could not deduce ChangeLog file, ' \
555                                  'not unique location'
556                            self.errors.append(Error(msg))
557                            return
558                        changelog = location
559                if changelog is not None:
560                    entry.folder = changelog
561                else:
562                    msg = 'could not deduce ChangeLog file'
563                    self.errors.append(Error(msg))
564
565    @classmethod
566    def in_ignored_location(cls, path):
567        for ignored in ignored_prefixes:
568            if path.startswith(ignored):
569                return True
570        return False
571
572    @classmethod
573    def get_changelog_by_path(cls, path):
574        components = path.split('/')
575        while components:
576            if '/'.join(components) in changelog_locations:
577                break
578            components = components[:-1]
579        return '/'.join(components)
580
581    def check_mentioned_files(self):
582        folder_count = len([x.folder for x in self.changelog_entries])
583        assert folder_count == len(self.changelog_entries)
584
585        mentioned_files = set()
586        mentioned_patterns = []
587        used_patterns = set()
588        for entry in self.changelog_entries:
589            if not entry.files and not entry.file_patterns:
590                msg = 'no files mentioned for ChangeLog in directory'
591                self.errors.append(Error(msg, entry.folder))
592            assert not entry.folder.endswith('/')
593            for file in entry.files:
594                if not self.is_changelog_filename(file):
595                    mentioned_files.add(os.path.join(entry.folder, file))
596            for pattern in entry.file_patterns:
597                mentioned_patterns.append(os.path.join(entry.folder, pattern))
598
599        cand = [x[0] for x in self.info.modified_files
600                if not self.is_changelog_filename(x[0])]
601        changed_files = set(cand)
602        for file in sorted(mentioned_files - changed_files):
603            msg = 'unchanged file mentioned in a ChangeLog'
604            candidates = difflib.get_close_matches(file, changed_files, 1)
605            if candidates:
606                msg += f' (did you mean "{candidates[0]}"?)'
607            self.errors.append(Error(msg, file))
608        for file in sorted(changed_files - mentioned_files):
609            if not self.in_ignored_location(file):
610                if file in self.new_files:
611                    changelog_location = self.get_changelog_by_path(file)
612                    # Python2: we cannot use next(filter(...))
613                    entries = filter(lambda x: x.folder == changelog_location,
614                                     self.changelog_entries)
615                    entries = list(entries)
616                    entry = entries[0] if entries else None
617                    if not entry:
618                        prs = self.top_level_prs
619                        if not prs:
620                            # if all ChangeLog entries have identical PRs
621                            # then use them
622                            prs = self.changelog_entries[0].prs
623                            for entry in self.changelog_entries:
624                                if entry.prs != prs:
625                                    prs = []
626                                    break
627                        entry = ChangeLogEntry(changelog_location,
628                                               self.top_level_authors,
629                                               prs)
630                        self.changelog_entries.append(entry)
631                    # strip prefix of the file
632                    assert file.startswith(entry.folder)
633                    file = file[len(entry.folder):].lstrip('/')
634                    entry.lines.append('\t* %s: New file.' % file)
635                    entry.files.append(file)
636                else:
637                    used_pattern = [p for p in mentioned_patterns
638                                    if file.startswith(p)]
639                    used_pattern = used_pattern[0] if used_pattern else None
640                    if used_pattern:
641                        used_patterns.add(used_pattern)
642                    else:
643                        msg = 'changed file not mentioned in a ChangeLog'
644                        self.errors.append(Error(msg, file))
645
646        for pattern in mentioned_patterns:
647            if pattern not in used_patterns:
648                error = "pattern doesn't match any changed files"
649                self.errors.append(Error(error, pattern))
650
651    def check_for_correct_changelog(self):
652        for entry in self.changelog_entries:
653            for file in entry.files:
654                full_path = os.path.join(entry.folder, file)
655                changelog_location = self.get_changelog_by_path(full_path)
656                if changelog_location != entry.folder:
657                    msg = 'wrong ChangeLog location "%s", should be "%s"'
658                    err = Error(msg % (entry.folder, changelog_location), file)
659                    self.errors.append(err)
660
661    @classmethod
662    def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
663        output = ''
664        for i, author in enumerate(authors):
665            if i == 0:
666                output += '%s%s  %s\n' % (prefix, timestamp, author)
667            else:
668                output += '%s\t    %s\n' % (prefix, author)
669        output += '\n'
670        return output
671
672    def to_changelog_entries(self, use_commit_ts=False):
673        current_timestamp = self.info.date.strftime(DATE_FORMAT)
674        for entry in self.changelog_entries:
675            output = ''
676            timestamp = entry.datetime
677            if self.revert_commit:
678                timestamp = current_timestamp
679                orig_date = self.original_info.date
680                current_timestamp = orig_date.strftime(DATE_FORMAT)
681            elif self.cherry_pick_commit:
682                info = self.commit_to_info_hook(self.cherry_pick_commit)
683                # it can happen that it is a cherry-pick for a different
684                # repository
685                if info:
686                    timestamp = info.date.strftime(DATE_FORMAT)
687                else:
688                    timestamp = current_timestamp
689            elif not timestamp or use_commit_ts:
690                timestamp = current_timestamp
691            authors = entry.authors if entry.authors else [self.info.author]
692            # add Co-Authored-By authors to all ChangeLog entries
693            for author in self.co_authors:
694                if author not in authors:
695                    authors.append(author)
696
697            if self.cherry_pick_commit or self.revert_commit:
698                original_author = self.original_info.author
699                output += self.format_authors_in_changelog([original_author],
700                                                           current_timestamp)
701                if self.revert_commit:
702                    output += '\tRevert:\n'
703                else:
704                    output += '\tBackported from master:\n'
705                output += self.format_authors_in_changelog(authors,
706                                                           timestamp, '\t')
707            else:
708                output += self.format_authors_in_changelog(authors, timestamp)
709            for pr in entry.prs:
710                output += '\t%s\n' % pr
711            for line in entry.lines:
712                output += line + '\n'
713            yield (entry.folder, output.rstrip())
714
715    def print_output(self):
716        for entry, output in self.to_changelog_entries():
717            print('------ %s/ChangeLog ------ ' % entry)
718            print(output)
719
720    def print_errors(self):
721        print('Errors:')
722        for error in self.errors:
723            print(error)
724