xref: /netbsd-src/external/gpl3/gcc/dist/contrib/gcc-changelog/git_commit.py (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1fb8a8121Smrg#!/usr/bin/env python3
2fb8a8121Smrg#
3fb8a8121Smrg# This file is part of GCC.
4fb8a8121Smrg#
5fb8a8121Smrg# GCC is free software; you can redistribute it and/or modify it under
6fb8a8121Smrg# the terms of the GNU General Public License as published by the Free
7fb8a8121Smrg# Software Foundation; either version 3, or (at your option) any later
8fb8a8121Smrg# version.
9fb8a8121Smrg#
10fb8a8121Smrg# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11fb8a8121Smrg# WARRANTY; without even the implied warranty of MERCHANTABILITY or
12fb8a8121Smrg# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13fb8a8121Smrg# for more details.
14fb8a8121Smrg#
15fb8a8121Smrg# You should have received a copy of the GNU General Public License
16fb8a8121Smrg# along with GCC; see the file COPYING3.  If not see
17fb8a8121Smrg# <http://www.gnu.org/licenses/>.  */
18fb8a8121Smrg
19fb8a8121Smrgimport difflib
20fb8a8121Smrgimport os
21fb8a8121Smrgimport re
22a448f87cSmrgimport sys
23fb8a8121Smrg
24a448f87cSmrgdefault_changelog_locations = {
25fb8a8121Smrg    'c++tools',
26fb8a8121Smrg    'config',
27fb8a8121Smrg    'contrib',
28fb8a8121Smrg    'contrib/header-tools',
29fb8a8121Smrg    'contrib/reghunt',
30fb8a8121Smrg    'contrib/regression',
31fb8a8121Smrg    'fixincludes',
32fb8a8121Smrg    'gcc/ada',
33fb8a8121Smrg    'gcc/analyzer',
34fb8a8121Smrg    'gcc/brig',
35fb8a8121Smrg    'gcc/c',
36fb8a8121Smrg    'gcc/c-family',
37fb8a8121Smrg    'gcc',
38fb8a8121Smrg    'gcc/cp',
39fb8a8121Smrg    'gcc/d',
40fb8a8121Smrg    'gcc/fortran',
41fb8a8121Smrg    'gcc/go',
42fb8a8121Smrg    'gcc/jit',
43fb8a8121Smrg    'gcc/lto',
44fb8a8121Smrg    'gcc/objc',
45fb8a8121Smrg    'gcc/objcp',
46fb8a8121Smrg    'gcc/po',
47fb8a8121Smrg    'gcc/testsuite',
48fb8a8121Smrg    'gnattools',
49fb8a8121Smrg    'gotools',
50fb8a8121Smrg    'include',
51fb8a8121Smrg    'intl',
52fb8a8121Smrg    'libada',
53fb8a8121Smrg    'libatomic',
54fb8a8121Smrg    'libbacktrace',
55fb8a8121Smrg    'libcc1',
56fb8a8121Smrg    'libcody',
57fb8a8121Smrg    'libcpp',
58fb8a8121Smrg    'libcpp/po',
59fb8a8121Smrg    'libdecnumber',
60fb8a8121Smrg    'libffi',
61fb8a8121Smrg    'libgcc',
62fb8a8121Smrg    'libgcc/config/avr/libf7',
63fb8a8121Smrg    'libgcc/config/libbid',
64fb8a8121Smrg    'libgfortran',
65fb8a8121Smrg    'libgomp',
66fb8a8121Smrg    'libhsail-rt',
67fb8a8121Smrg    'libiberty',
68fb8a8121Smrg    'libitm',
69fb8a8121Smrg    'libobjc',
70fb8a8121Smrg    'liboffloadmic',
71fb8a8121Smrg    'libphobos',
72fb8a8121Smrg    'libquadmath',
73fb8a8121Smrg    'libsanitizer',
74fb8a8121Smrg    'libssp',
75fb8a8121Smrg    'libstdc++-v3',
76fb8a8121Smrg    'libvtv',
77fb8a8121Smrg    'lto-plugin',
78fb8a8121Smrg    'maintainer-scripts',
79fb8a8121Smrg    'zlib'}
80fb8a8121Smrg
81fb8a8121Smrgbug_components = {
82fb8a8121Smrg    'ada',
83fb8a8121Smrg    'analyzer',
84fb8a8121Smrg    'boehm-gc',
85fb8a8121Smrg    'bootstrap',
86fb8a8121Smrg    'c',
87fb8a8121Smrg    'c++',
88fb8a8121Smrg    'd',
89fb8a8121Smrg    'debug',
90fb8a8121Smrg    'demangler',
91fb8a8121Smrg    'driver',
92fb8a8121Smrg    'fastjar',
93fb8a8121Smrg    'fortran',
94fb8a8121Smrg    'gcov-profile',
95fb8a8121Smrg    'go',
96fb8a8121Smrg    'hsa',
97fb8a8121Smrg    'inline-asm',
98fb8a8121Smrg    'ipa',
99fb8a8121Smrg    'java',
100fb8a8121Smrg    'jit',
101fb8a8121Smrg    'libbacktrace',
102fb8a8121Smrg    'libf2c',
103fb8a8121Smrg    'libffi',
104fb8a8121Smrg    'libfortran',
105fb8a8121Smrg    'libgcc',
106fb8a8121Smrg    'libgcj',
107fb8a8121Smrg    'libgomp',
108fb8a8121Smrg    'libitm',
109fb8a8121Smrg    'libobjc',
110fb8a8121Smrg    'libquadmath',
111fb8a8121Smrg    'libstdc++',
112fb8a8121Smrg    'lto',
113fb8a8121Smrg    'middle-end',
114fb8a8121Smrg    'modula2',
115fb8a8121Smrg    'objc',
116fb8a8121Smrg    'objc++',
117fb8a8121Smrg    'other',
118fb8a8121Smrg    'pch',
119fb8a8121Smrg    'pending',
120fb8a8121Smrg    'plugins',
121fb8a8121Smrg    'preprocessor',
122fb8a8121Smrg    'regression',
123fb8a8121Smrg    'rtl-optimization',
124fb8a8121Smrg    'sanitizer',
125fb8a8121Smrg    'spam',
126fb8a8121Smrg    'target',
127fb8a8121Smrg    'testsuite',
128fb8a8121Smrg    'translation',
129fb8a8121Smrg    'tree-optimization',
130fb8a8121Smrg    'web'}
131fb8a8121Smrg
132fb8a8121Smrgignored_prefixes = {
133fb8a8121Smrg    'gcc/d/dmd/',
134fb8a8121Smrg    'gcc/go/gofrontend/',
135fb8a8121Smrg    'gcc/testsuite/gdc.test/',
136fb8a8121Smrg    'gcc/testsuite/go.test/test/',
137a448f87cSmrg    'libffi/',
138fb8a8121Smrg    'libgo/',
139fb8a8121Smrg    'libphobos/libdruntime/',
140fb8a8121Smrg    'libphobos/src/',
141fb8a8121Smrg    'libsanitizer/',
142fb8a8121Smrg    }
143fb8a8121Smrg
144fb8a8121Smrgwildcard_prefixes = {
145fb8a8121Smrg    'gcc/testsuite/',
146fb8a8121Smrg    'libstdc++-v3/doc/html/',
147fb8a8121Smrg    'libstdc++-v3/testsuite/'
148fb8a8121Smrg    }
149fb8a8121Smrg
150fb8a8121Smrgmisc_files = {
151fb8a8121Smrg    'gcc/DATESTAMP',
152fb8a8121Smrg    'gcc/BASE-VER',
153fb8a8121Smrg    'gcc/DEV-PHASE'
154fb8a8121Smrg    }
155fb8a8121Smrg
156fb8a8121Smrgauthor_line_regex = \
157fb8a8121Smrg        re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
158fb8a8121Smrgadditional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
159fb8a8121Smrgchangelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
160a448f87cSmrgsubject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
161a448f87cSmrgsubject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
162a448f87cSmrgpr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
163fb8a8121Smrgdr_regex = re.compile(r'\tDR ([0-9]+)$')
164fb8a8121Smrgstar_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
165fb8a8121Smrgend_of_location_regex = re.compile(r'[\[<(:]')
166fb8a8121Smrgitem_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
167fb8a8121Smrgitem_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
168*b1e83836Smrgrevert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
169fb8a8121Smrgcherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
170fb8a8121Smrg
171fb8a8121SmrgLINE_LIMIT = 100
172fb8a8121SmrgTAB_WIDTH = 8
173fb8a8121SmrgCO_AUTHORED_BY_PREFIX = 'co-authored-by: '
174fb8a8121Smrg
175fb8a8121SmrgREVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
176fb8a8121Smrg                   'acked-by: ', 'tested-by: ', 'reported-by: ',
177fb8a8121Smrg                   'suggested-by: ')
178fb8a8121SmrgDATE_FORMAT = '%Y-%m-%d'
179fb8a8121Smrg
180fb8a8121Smrg
181fb8a8121Smrgdef decode_path(path):
182fb8a8121Smrg    # When core.quotepath is true (default value), utf8 chars are encoded like:
183fb8a8121Smrg    # "b/ko\304\215ka.txt"
184fb8a8121Smrg    #
185fb8a8121Smrg    # The upstream bug is fixed:
186fb8a8121Smrg    # https://github.com/gitpython-developers/GitPython/issues/1099
187fb8a8121Smrg    #
188fb8a8121Smrg    # but we still need a workaround for older versions of the library.
189fb8a8121Smrg    # Please take a look at the explanation of the transformation:
190fb8a8121Smrg    # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
191fb8a8121Smrg
192fb8a8121Smrg    if path.startswith('"') and path.endswith('"'):
193fb8a8121Smrg        return (path.strip('"').encode('utf8').decode('unicode-escape')
194fb8a8121Smrg                .encode('latin-1').decode('utf8'))
195fb8a8121Smrg    else:
196fb8a8121Smrg        return path
197fb8a8121Smrg
198fb8a8121Smrg
199fb8a8121Smrgclass Error:
200a448f87cSmrg    def __init__(self, message, line=None, details=None):
201fb8a8121Smrg        self.message = message
202fb8a8121Smrg        self.line = line
203a448f87cSmrg        self.details = details
204fb8a8121Smrg
205fb8a8121Smrg    def __repr__(self):
206fb8a8121Smrg        s = self.message
207fb8a8121Smrg        if self.line:
208fb8a8121Smrg            s += ': "%s"' % self.line
209fb8a8121Smrg        return s
210fb8a8121Smrg
211fb8a8121Smrg
212fb8a8121Smrgclass ChangeLogEntry:
213fb8a8121Smrg    def __init__(self, folder, authors, prs):
214fb8a8121Smrg        self.folder = folder
215fb8a8121Smrg        # The 'list.copy()' function is not available before Python 3.3
216fb8a8121Smrg        self.author_lines = list(authors)
217fb8a8121Smrg        self.initial_prs = list(prs)
218fb8a8121Smrg        self.prs = list(prs)
219fb8a8121Smrg        self.lines = []
220fb8a8121Smrg        self.files = []
221fb8a8121Smrg        self.file_patterns = []
222a448f87cSmrg        self.parentheses_stack = []
223fb8a8121Smrg
224fb8a8121Smrg    def parse_file_names(self):
225fb8a8121Smrg        # Whether the content currently processed is between a star prefix the
226fb8a8121Smrg        # end of the file list: a colon or an open paren.
227fb8a8121Smrg        in_location = False
228fb8a8121Smrg
229fb8a8121Smrg        for line in self.lines:
230fb8a8121Smrg            # If this line matches the star prefix, start the location
231fb8a8121Smrg            # processing on the information that follows the star.
232a448f87cSmrg            # Note that we need to skip macro names that can be in form of:
233a448f87cSmrg            #
234a448f87cSmrg            # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
235a448f87cSmrg            # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
236a448f87cSmrg            # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
237a448f87cSmrg            #
238fb8a8121Smrg            m = star_prefix_regex.match(line)
239a448f87cSmrg            if m and len(m.group('spaces')) == 1:
240fb8a8121Smrg                in_location = True
241fb8a8121Smrg                line = m.group('content')
242fb8a8121Smrg
243fb8a8121Smrg            if in_location:
244fb8a8121Smrg                # Strip everything that is not a filename in "line":
245fb8a8121Smrg                # entities "(NAME)", cases "<PATTERN>", conditions
246fb8a8121Smrg                # "[COND]", entry text (the colon, if present, and
247fb8a8121Smrg                # anything that follows it).
248fb8a8121Smrg                m = end_of_location_regex.search(line)
249fb8a8121Smrg                if m:
250fb8a8121Smrg                    line = line[:m.start()]
251fb8a8121Smrg                    in_location = False
252fb8a8121Smrg
253fb8a8121Smrg                # At this point, all that's left is a list of filenames
254fb8a8121Smrg                # separated by commas and whitespaces.
255fb8a8121Smrg                for file in line.split(','):
256fb8a8121Smrg                    file = file.strip()
257fb8a8121Smrg                    if file:
258fb8a8121Smrg                        if file.endswith('*'):
259fb8a8121Smrg                            self.file_patterns.append(file[:-1])
260fb8a8121Smrg                        else:
261fb8a8121Smrg                            self.files.append(file)
262fb8a8121Smrg
263fb8a8121Smrg    @property
264fb8a8121Smrg    def datetime(self):
265fb8a8121Smrg        for author in self.author_lines:
266fb8a8121Smrg            if author[1]:
267fb8a8121Smrg                return author[1]
268fb8a8121Smrg        return None
269fb8a8121Smrg
270fb8a8121Smrg    @property
271fb8a8121Smrg    def authors(self):
272fb8a8121Smrg        return [author_line[0] for author_line in self.author_lines]
273fb8a8121Smrg
274fb8a8121Smrg    @property
275fb8a8121Smrg    def is_empty(self):
276fb8a8121Smrg        return not self.lines and self.prs == self.initial_prs
277fb8a8121Smrg
278fb8a8121Smrg    def contains_author(self, author):
279fb8a8121Smrg        for author_lines in self.author_lines:
280fb8a8121Smrg            if author_lines[0] == author:
281fb8a8121Smrg                return True
282fb8a8121Smrg        return False
283fb8a8121Smrg
284fb8a8121Smrg
285fb8a8121Smrgclass GitInfo:
286fb8a8121Smrg    def __init__(self, hexsha, date, author, lines, modified_files):
287fb8a8121Smrg        self.hexsha = hexsha
288fb8a8121Smrg        self.date = date
289fb8a8121Smrg        self.author = author
290fb8a8121Smrg        self.lines = lines
291fb8a8121Smrg        self.modified_files = modified_files
292fb8a8121Smrg
293fb8a8121Smrg
294fb8a8121Smrgclass GitCommit:
295a448f87cSmrg    def __init__(self, info, commit_to_info_hook=None, ref_name=None):
296fb8a8121Smrg        self.original_info = info
297fb8a8121Smrg        self.info = info
298fb8a8121Smrg        self.message = None
299fb8a8121Smrg        self.changes = None
300fb8a8121Smrg        self.changelog_entries = []
301fb8a8121Smrg        self.errors = []
302fb8a8121Smrg        self.top_level_authors = []
303fb8a8121Smrg        self.co_authors = []
304fb8a8121Smrg        self.top_level_prs = []
305a448f87cSmrg        self.subject_prs = set()
306fb8a8121Smrg        self.cherry_pick_commit = None
307fb8a8121Smrg        self.revert_commit = None
308fb8a8121Smrg        self.commit_to_info_hook = commit_to_info_hook
309a448f87cSmrg        self.init_changelog_locations(ref_name)
310fb8a8121Smrg
311fb8a8121Smrg        # Skip Update copyright years commits
312fb8a8121Smrg        if self.info.lines and self.info.lines[0] == 'Update copyright years.':
313fb8a8121Smrg            return
314fb8a8121Smrg
315a448f87cSmrg        if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
316a448f87cSmrg            self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
317a448f87cSmrg
318fb8a8121Smrg        # Identify first if the commit is a Revert commit
319fb8a8121Smrg        for line in self.info.lines:
320*b1e83836Smrg            m = revert_regex.fullmatch(line)
321fb8a8121Smrg            if m:
322fb8a8121Smrg                self.revert_commit = m.group('hash')
323fb8a8121Smrg                break
324fb8a8121Smrg        if self.revert_commit:
325fb8a8121Smrg            self.info = self.commit_to_info_hook(self.revert_commit)
326fb8a8121Smrg
327a448f87cSmrg        # The following happens for get_email.py:
328a448f87cSmrg        if not self.info:
329a448f87cSmrg            return
330a448f87cSmrg
331a448f87cSmrg        self.check_commit_email()
332a448f87cSmrg
333a448f87cSmrg        # Extract PR numbers form the subject line
334a448f87cSmrg        # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
335a448f87cSmrg        if self.info.lines and not self.revert_commit:
336a448f87cSmrg            self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
337a448f87cSmrg            for m in subject_pr_regex.finditer(info.lines[0]):
338a448f87cSmrg                if not m.group('component') in bug_components:
339a448f87cSmrg                    self.errors.append(Error('invalid PR component in subject', info.lines[0]))
340a448f87cSmrg                self.subject_prs.add(m.group('pr'))
341a448f87cSmrg
342a448f87cSmrg        # Allow complete deletion of ChangeLog files in a commit
343fb8a8121Smrg        project_files = [f for f in self.info.modified_files
344a448f87cSmrg                         if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
345fb8a8121Smrg                         or f[0] in misc_files]
346fb8a8121Smrg        ignored_files = [f for f in self.info.modified_files
347fb8a8121Smrg                         if self.in_ignored_location(f[0])]
348fb8a8121Smrg        if len(project_files) == len(self.info.modified_files):
349fb8a8121Smrg            # All modified files are only MISC files
350fb8a8121Smrg            return
351a448f87cSmrg        elif project_files:
352a448f87cSmrg            err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
353a448f87cSmrg                  'should be done separately from normal commits\n' \
354a448f87cSmrg                  '(note: ChangeLog entries will be automatically ' \
355a448f87cSmrg                  'added by a cron job)'
356a448f87cSmrg            self.errors.append(Error(err))
357fb8a8121Smrg            return
358fb8a8121Smrg
359fb8a8121Smrg        all_are_ignored = (len(project_files) + len(ignored_files)
360fb8a8121Smrg                           == len(self.info.modified_files))
361fb8a8121Smrg        self.parse_lines(all_are_ignored)
362fb8a8121Smrg        if self.changes:
363fb8a8121Smrg            self.parse_changelog()
364fb8a8121Smrg            self.parse_file_names()
365fb8a8121Smrg            self.check_for_empty_description()
366a448f87cSmrg            self.check_for_broken_parentheses()
367fb8a8121Smrg            self.deduce_changelog_locations()
368fb8a8121Smrg            self.check_file_patterns()
369fb8a8121Smrg            if not self.errors:
370fb8a8121Smrg                self.check_mentioned_files()
371fb8a8121Smrg                self.check_for_correct_changelog()
372a448f87cSmrg        if self.subject_prs:
373a448f87cSmrg            self.errors.append(Error('PR %s in subject but not in changelog' %
374a448f87cSmrg                                     ', '.join(self.subject_prs), self.info.lines[0]))
375fb8a8121Smrg
376fb8a8121Smrg    @property
377fb8a8121Smrg    def success(self):
378fb8a8121Smrg        return not self.errors
379fb8a8121Smrg
380fb8a8121Smrg    @property
381fb8a8121Smrg    def new_files(self):
382fb8a8121Smrg        return [x[0] for x in self.info.modified_files if x[1] == 'A']
383fb8a8121Smrg
384fb8a8121Smrg    @classmethod
385a448f87cSmrg    def is_changelog_filename(cls, path, allow_suffix=False):
386a448f87cSmrg        basename = os.path.basename(path)
387a448f87cSmrg        if basename == 'ChangeLog':
388a448f87cSmrg            return True
389a448f87cSmrg        elif allow_suffix and basename.startswith('ChangeLog'):
390a448f87cSmrg            return True
391a448f87cSmrg        else:
392a448f87cSmrg            return False
393fb8a8121Smrg
394a448f87cSmrg    def find_changelog_location(self, name):
395fb8a8121Smrg        if name.startswith('\t'):
396fb8a8121Smrg            name = name[1:]
397fb8a8121Smrg        if name.endswith(':'):
398fb8a8121Smrg            name = name[:-1]
399fb8a8121Smrg        if name.endswith('/'):
400fb8a8121Smrg            name = name[:-1]
401a448f87cSmrg        return name if name in self.changelog_locations else None
402fb8a8121Smrg
403fb8a8121Smrg    @classmethod
404fb8a8121Smrg    def format_git_author(cls, author):
405fb8a8121Smrg        assert '<' in author
406fb8a8121Smrg        return author.replace('<', ' <')
407fb8a8121Smrg
408fb8a8121Smrg    @classmethod
409fb8a8121Smrg    def parse_git_name_status(cls, string):
410fb8a8121Smrg        modified_files = []
411fb8a8121Smrg        for entry in string.split('\n'):
412fb8a8121Smrg            parts = entry.split('\t')
413fb8a8121Smrg            t = parts[0]
414fb8a8121Smrg            if t == 'A' or t == 'D' or t == 'M':
415fb8a8121Smrg                modified_files.append((parts[1], t))
416fb8a8121Smrg            elif t.startswith('R'):
417fb8a8121Smrg                modified_files.append((parts[1], 'D'))
418fb8a8121Smrg                modified_files.append((parts[2], 'A'))
419fb8a8121Smrg        return modified_files
420fb8a8121Smrg
421a448f87cSmrg    def init_changelog_locations(self, ref_name):
422a448f87cSmrg        self.changelog_locations = list(default_changelog_locations)
423a448f87cSmrg        if ref_name:
424a448f87cSmrg            version = sys.maxsize
425a448f87cSmrg            if 'releases/gcc-' in ref_name:
426a448f87cSmrg                version = int(ref_name.split('-')[-1])
427a448f87cSmrg            if version >= 12:
428a448f87cSmrg                # HSA and BRIG were removed in GCC 12
429a448f87cSmrg                self.changelog_locations.remove('gcc/brig')
430a448f87cSmrg                self.changelog_locations.remove('libhsail-rt')
431a448f87cSmrg
432fb8a8121Smrg    def parse_lines(self, all_are_ignored):
433fb8a8121Smrg        body = self.info.lines
434fb8a8121Smrg
435fb8a8121Smrg        for i, b in enumerate(body):
436fb8a8121Smrg            if not b:
437fb8a8121Smrg                continue
438fb8a8121Smrg            if (changelog_regex.match(b) or self.find_changelog_location(b)
439fb8a8121Smrg                    or star_prefix_regex.match(b) or pr_regex.match(b)
440a448f87cSmrg                    or dr_regex.match(b) or author_line_regex.match(b)
441a448f87cSmrg                    or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
442fb8a8121Smrg                self.changes = body[i:]
443fb8a8121Smrg                return
444fb8a8121Smrg        if not all_are_ignored:
445fb8a8121Smrg            self.errors.append(Error('cannot find a ChangeLog location in '
446fb8a8121Smrg                                     'message'))
447fb8a8121Smrg
448fb8a8121Smrg    def parse_changelog(self):
449fb8a8121Smrg        last_entry = None
450fb8a8121Smrg        will_deduce = False
451fb8a8121Smrg        for line in self.changes:
452fb8a8121Smrg            if not line:
453fb8a8121Smrg                if last_entry and will_deduce:
454fb8a8121Smrg                    last_entry = None
455fb8a8121Smrg                continue
456fb8a8121Smrg            if line != line.rstrip():
457fb8a8121Smrg                self.errors.append(Error('trailing whitespace', line))
458fb8a8121Smrg            if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
459a448f87cSmrg                # support long filenames
460a448f87cSmrg                if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
461fb8a8121Smrg                    self.errors.append(Error('line exceeds %d character limit'
462fb8a8121Smrg                                             % LINE_LIMIT, line))
463fb8a8121Smrg            m = changelog_regex.match(line)
464fb8a8121Smrg            if m:
465fb8a8121Smrg                last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
466fb8a8121Smrg                                            self.top_level_authors,
467fb8a8121Smrg                                            self.top_level_prs)
468fb8a8121Smrg                self.changelog_entries.append(last_entry)
469fb8a8121Smrg            elif self.find_changelog_location(line):
470fb8a8121Smrg                last_entry = ChangeLogEntry(self.find_changelog_location(line),
471fb8a8121Smrg                                            self.top_level_authors,
472fb8a8121Smrg                                            self.top_level_prs)
473fb8a8121Smrg                self.changelog_entries.append(last_entry)
474fb8a8121Smrg            else:
475fb8a8121Smrg                author_tuple = None
476fb8a8121Smrg                pr_line = None
477fb8a8121Smrg                if author_line_regex.match(line):
478fb8a8121Smrg                    m = author_line_regex.match(line)
479fb8a8121Smrg                    author_tuple = (m.group('name'), m.group('datetime'))
480fb8a8121Smrg                elif additional_author_regex.match(line):
481fb8a8121Smrg                    m = additional_author_regex.match(line)
482fb8a8121Smrg                    if len(m.group('spaces')) != 4:
483fb8a8121Smrg                        msg = 'additional author must be indented with '\
484fb8a8121Smrg                              'one tab and four spaces'
485fb8a8121Smrg                        self.errors.append(Error(msg, line))
486fb8a8121Smrg                    else:
487fb8a8121Smrg                        author_tuple = (m.group('name'), None)
488fb8a8121Smrg                elif pr_regex.match(line):
489a448f87cSmrg                    m = pr_regex.match(line)
490a448f87cSmrg                    component = m.group('component')
491a448f87cSmrg                    pr = m.group('pr')
492fb8a8121Smrg                    if not component:
493fb8a8121Smrg                        self.errors.append(Error('missing PR component', line))
494fb8a8121Smrg                        continue
495fb8a8121Smrg                    elif not component[:-1] in bug_components:
496fb8a8121Smrg                        self.errors.append(Error('invalid PR component', line))
497fb8a8121Smrg                        continue
498fb8a8121Smrg                    else:
499fb8a8121Smrg                        pr_line = line.lstrip()
500a448f87cSmrg                    if pr in self.subject_prs:
501a448f87cSmrg                        self.subject_prs.remove(pr)
502fb8a8121Smrg                elif dr_regex.match(line):
503fb8a8121Smrg                    pr_line = line.lstrip()
504fb8a8121Smrg
505fb8a8121Smrg                lowered_line = line.lower()
506fb8a8121Smrg                if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
507fb8a8121Smrg                    name = line[len(CO_AUTHORED_BY_PREFIX):]
508fb8a8121Smrg                    author = self.format_git_author(name)
509fb8a8121Smrg                    self.co_authors.append(author)
510fb8a8121Smrg                    continue
511fb8a8121Smrg                elif lowered_line.startswith(REVIEW_PREFIXES):
512fb8a8121Smrg                    continue
513fb8a8121Smrg                else:
514fb8a8121Smrg                    m = cherry_pick_regex.search(line)
515fb8a8121Smrg                    if m:
516fb8a8121Smrg                        commit = m.group('hash')
517fb8a8121Smrg                        if self.cherry_pick_commit:
518fb8a8121Smrg                            msg = 'multiple cherry pick lines'
519fb8a8121Smrg                            self.errors.append(Error(msg, line))
520fb8a8121Smrg                        else:
521fb8a8121Smrg                            self.cherry_pick_commit = commit
522fb8a8121Smrg                        continue
523fb8a8121Smrg
524fb8a8121Smrg                # ChangeLog name will be deduced later
525fb8a8121Smrg                if not last_entry:
526fb8a8121Smrg                    if author_tuple:
527fb8a8121Smrg                        self.top_level_authors.append(author_tuple)
528fb8a8121Smrg                        continue
529fb8a8121Smrg                    elif pr_line:
530fb8a8121Smrg                        # append to top_level_prs only when we haven't met
531fb8a8121Smrg                        # a ChangeLog entry
532fb8a8121Smrg                        if (pr_line not in self.top_level_prs
533fb8a8121Smrg                                and not self.changelog_entries):
534fb8a8121Smrg                            self.top_level_prs.append(pr_line)
535fb8a8121Smrg                        continue
536fb8a8121Smrg                    else:
537fb8a8121Smrg                        last_entry = ChangeLogEntry(None,
538fb8a8121Smrg                                                    self.top_level_authors,
539fb8a8121Smrg                                                    self.top_level_prs)
540fb8a8121Smrg                        self.changelog_entries.append(last_entry)
541fb8a8121Smrg                        will_deduce = True
542fb8a8121Smrg                elif author_tuple:
543fb8a8121Smrg                    if not last_entry.contains_author(author_tuple[0]):
544fb8a8121Smrg                        last_entry.author_lines.append(author_tuple)
545fb8a8121Smrg                    continue
546fb8a8121Smrg
547fb8a8121Smrg                if not line.startswith('\t'):
548fb8a8121Smrg                    err = Error('line should start with a tab', line)
549fb8a8121Smrg                    self.errors.append(err)
550fb8a8121Smrg                elif pr_line:
551fb8a8121Smrg                    last_entry.prs.append(pr_line)
552fb8a8121Smrg                else:
553fb8a8121Smrg                    m = star_prefix_regex.match(line)
554fb8a8121Smrg                    if m:
555a448f87cSmrg                        if (len(m.group('spaces')) != 1 and
556a448f87cSmrg                                not last_entry.parentheses_stack):
557fb8a8121Smrg                            msg = 'one space should follow asterisk'
558fb8a8121Smrg                            self.errors.append(Error(msg, line))
559fb8a8121Smrg                        else:
560fb8a8121Smrg                            content = m.group('content')
561fb8a8121Smrg                            parts = content.split(':')
562fb8a8121Smrg                            if len(parts) > 1:
563fb8a8121Smrg                                for needle in ('()', '[]', '<>'):
564fb8a8121Smrg                                    if ' ' + needle in parts[0]:
565fb8a8121Smrg                                        msg = f'empty group "{needle}" found'
566fb8a8121Smrg                                        self.errors.append(Error(msg, line))
567fb8a8121Smrg                            last_entry.lines.append(line)
568a448f87cSmrg                            self.process_parentheses(last_entry, line)
569fb8a8121Smrg                    else:
570fb8a8121Smrg                        if last_entry.is_empty:
571fb8a8121Smrg                            msg = 'first line should start with a tab, ' \
572fb8a8121Smrg                                  'an asterisk and a space'
573fb8a8121Smrg                            self.errors.append(Error(msg, line))
574fb8a8121Smrg                        else:
575fb8a8121Smrg                            last_entry.lines.append(line)
576a448f87cSmrg                            self.process_parentheses(last_entry, line)
577a448f87cSmrg
578a448f87cSmrg    def process_parentheses(self, last_entry, line):
579a448f87cSmrg        for c in line:
580a448f87cSmrg            if c == '(':
581a448f87cSmrg                last_entry.parentheses_stack.append(line)
582a448f87cSmrg            elif c == ')':
583a448f87cSmrg                if not last_entry.parentheses_stack:
584a448f87cSmrg                    msg = 'bad wrapping of parenthesis'
585a448f87cSmrg                    self.errors.append(Error(msg, line))
586a448f87cSmrg                else:
587a448f87cSmrg                    del last_entry.parentheses_stack[-1]
588fb8a8121Smrg
589fb8a8121Smrg    def parse_file_names(self):
590fb8a8121Smrg        for entry in self.changelog_entries:
591fb8a8121Smrg            entry.parse_file_names()
592fb8a8121Smrg
593fb8a8121Smrg    def check_file_patterns(self):
594fb8a8121Smrg        for entry in self.changelog_entries:
595fb8a8121Smrg            for pattern in entry.file_patterns:
596fb8a8121Smrg                name = os.path.join(entry.folder, pattern)
597fb8a8121Smrg                if not [name.startswith(pr) for pr in wildcard_prefixes]:
598fb8a8121Smrg                    msg = 'unsupported wildcard prefix'
599fb8a8121Smrg                    self.errors.append(Error(msg, name))
600fb8a8121Smrg
601fb8a8121Smrg    def check_for_empty_description(self):
602fb8a8121Smrg        for entry in self.changelog_entries:
603fb8a8121Smrg            for i, line in enumerate(entry.lines):
604fb8a8121Smrg                if (item_empty_regex.match(line) and
605fb8a8121Smrg                    (i == len(entry.lines) - 1
606fb8a8121Smrg                     or not entry.lines[i+1].strip()
607fb8a8121Smrg                     or item_parenthesis_regex.match(entry.lines[i+1]))):
608fb8a8121Smrg                    msg = 'missing description of a change'
609fb8a8121Smrg                    self.errors.append(Error(msg, line))
610fb8a8121Smrg
611a448f87cSmrg    def check_for_broken_parentheses(self):
612a448f87cSmrg        for entry in self.changelog_entries:
613a448f87cSmrg            if entry.parentheses_stack:
614a448f87cSmrg                msg = 'bad parentheses wrapping'
615a448f87cSmrg                self.errors.append(Error(msg, entry.parentheses_stack[-1]))
616a448f87cSmrg
617fb8a8121Smrg    def get_file_changelog_location(self, changelog_file):
618fb8a8121Smrg        for file in self.info.modified_files:
619fb8a8121Smrg            if file[0] == changelog_file:
620fb8a8121Smrg                # root ChangeLog file
621fb8a8121Smrg                return ''
622fb8a8121Smrg            index = file[0].find('/' + changelog_file)
623fb8a8121Smrg            if index != -1:
624fb8a8121Smrg                return file[0][:index]
625fb8a8121Smrg        return None
626fb8a8121Smrg
627fb8a8121Smrg    def deduce_changelog_locations(self):
628fb8a8121Smrg        for entry in self.changelog_entries:
629fb8a8121Smrg            if not entry.folder:
630fb8a8121Smrg                changelog = None
631fb8a8121Smrg                for file in entry.files:
632fb8a8121Smrg                    location = self.get_file_changelog_location(file)
633fb8a8121Smrg                    if (location == ''
634a448f87cSmrg                       or (location and location in self.changelog_locations)):
635fb8a8121Smrg                        if changelog and changelog != location:
636fb8a8121Smrg                            msg = 'could not deduce ChangeLog file, ' \
637fb8a8121Smrg                                  'not unique location'
638fb8a8121Smrg                            self.errors.append(Error(msg))
639fb8a8121Smrg                            return
640fb8a8121Smrg                        changelog = location
641fb8a8121Smrg                if changelog is not None:
642fb8a8121Smrg                    entry.folder = changelog
643fb8a8121Smrg                else:
644fb8a8121Smrg                    msg = 'could not deduce ChangeLog file'
645fb8a8121Smrg                    self.errors.append(Error(msg))
646fb8a8121Smrg
647fb8a8121Smrg    @classmethod
648fb8a8121Smrg    def in_ignored_location(cls, path):
649fb8a8121Smrg        for ignored in ignored_prefixes:
650fb8a8121Smrg            if path.startswith(ignored):
651fb8a8121Smrg                return True
652fb8a8121Smrg        return False
653fb8a8121Smrg
654a448f87cSmrg    def get_changelog_by_path(self, path):
655fb8a8121Smrg        components = path.split('/')
656fb8a8121Smrg        while components:
657a448f87cSmrg            if '/'.join(components) in self.changelog_locations:
658fb8a8121Smrg                break
659fb8a8121Smrg            components = components[:-1]
660fb8a8121Smrg        return '/'.join(components)
661fb8a8121Smrg
662fb8a8121Smrg    def check_mentioned_files(self):
663fb8a8121Smrg        folder_count = len([x.folder for x in self.changelog_entries])
664fb8a8121Smrg        assert folder_count == len(self.changelog_entries)
665fb8a8121Smrg
666fb8a8121Smrg        mentioned_files = set()
667fb8a8121Smrg        mentioned_patterns = []
668fb8a8121Smrg        used_patterns = set()
669fb8a8121Smrg        for entry in self.changelog_entries:
670fb8a8121Smrg            if not entry.files and not entry.file_patterns:
671fb8a8121Smrg                msg = 'no files mentioned for ChangeLog in directory'
672fb8a8121Smrg                self.errors.append(Error(msg, entry.folder))
673fb8a8121Smrg            assert not entry.folder.endswith('/')
674fb8a8121Smrg            for file in entry.files:
675fb8a8121Smrg                if not self.is_changelog_filename(file):
676a448f87cSmrg                    item = os.path.join(entry.folder, file)
677a448f87cSmrg                    if item in mentioned_files:
678a448f87cSmrg                        msg = 'same file specified multiple times'
679a448f87cSmrg                        self.errors.append(Error(msg, file))
680a448f87cSmrg                    else:
681a448f87cSmrg                        mentioned_files.add(item)
682fb8a8121Smrg            for pattern in entry.file_patterns:
683fb8a8121Smrg                mentioned_patterns.append(os.path.join(entry.folder, pattern))
684fb8a8121Smrg
685fb8a8121Smrg        cand = [x[0] for x in self.info.modified_files
686fb8a8121Smrg                if not self.is_changelog_filename(x[0])]
687fb8a8121Smrg        changed_files = set(cand)
688fb8a8121Smrg        for file in sorted(mentioned_files - changed_files):
689fb8a8121Smrg            msg = 'unchanged file mentioned in a ChangeLog'
690fb8a8121Smrg            candidates = difflib.get_close_matches(file, changed_files, 1)
691a448f87cSmrg            details = None
692fb8a8121Smrg            if candidates:
693fb8a8121Smrg                msg += f' (did you mean "{candidates[0]}"?)'
694a448f87cSmrg                details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
695a448f87cSmrg            self.errors.append(Error(msg, file, details))
696fb8a8121Smrg        for file in sorted(changed_files - mentioned_files):
697fb8a8121Smrg            if not self.in_ignored_location(file):
698fb8a8121Smrg                if file in self.new_files:
699fb8a8121Smrg                    changelog_location = self.get_changelog_by_path(file)
700fb8a8121Smrg                    # Python2: we cannot use next(filter(...))
701fb8a8121Smrg                    entries = filter(lambda x: x.folder == changelog_location,
702fb8a8121Smrg                                     self.changelog_entries)
703fb8a8121Smrg                    entries = list(entries)
704fb8a8121Smrg                    entry = entries[0] if entries else None
705fb8a8121Smrg                    if not entry:
706fb8a8121Smrg                        prs = self.top_level_prs
707fb8a8121Smrg                        if not prs:
708fb8a8121Smrg                            # if all ChangeLog entries have identical PRs
709fb8a8121Smrg                            # then use them
710fb8a8121Smrg                            prs = self.changelog_entries[0].prs
711fb8a8121Smrg                            for entry in self.changelog_entries:
712fb8a8121Smrg                                if entry.prs != prs:
713fb8a8121Smrg                                    prs = []
714fb8a8121Smrg                                    break
715fb8a8121Smrg                        entry = ChangeLogEntry(changelog_location,
716fb8a8121Smrg                                               self.top_level_authors,
717fb8a8121Smrg                                               prs)
718fb8a8121Smrg                        self.changelog_entries.append(entry)
719fb8a8121Smrg                    # strip prefix of the file
720fb8a8121Smrg                    assert file.startswith(entry.folder)
721a448f87cSmrg                    # do not allow auto-addition of New files
722a448f87cSmrg                    # for the top-level folder
723a448f87cSmrg                    if entry.folder:
724fb8a8121Smrg                        file = file[len(entry.folder):].lstrip('/')
725fb8a8121Smrg                        entry.lines.append('\t* %s: New file.' % file)
726fb8a8121Smrg                        entry.files.append(file)
727fb8a8121Smrg                    else:
728a448f87cSmrg                        msg = 'new file in the top-level folder not mentioned in a ChangeLog'
729a448f87cSmrg                        self.errors.append(Error(msg, file))
730a448f87cSmrg                else:
731fb8a8121Smrg                    used_pattern = [p for p in mentioned_patterns
732fb8a8121Smrg                                    if file.startswith(p)]
733fb8a8121Smrg                    used_pattern = used_pattern[0] if used_pattern else None
734fb8a8121Smrg                    if used_pattern:
735fb8a8121Smrg                        used_patterns.add(used_pattern)
736fb8a8121Smrg                    else:
737fb8a8121Smrg                        msg = 'changed file not mentioned in a ChangeLog'
738fb8a8121Smrg                        self.errors.append(Error(msg, file))
739fb8a8121Smrg
740fb8a8121Smrg        for pattern in mentioned_patterns:
741fb8a8121Smrg            if pattern not in used_patterns:
742fb8a8121Smrg                error = "pattern doesn't match any changed files"
743fb8a8121Smrg                self.errors.append(Error(error, pattern))
744fb8a8121Smrg
745fb8a8121Smrg    def check_for_correct_changelog(self):
746fb8a8121Smrg        for entry in self.changelog_entries:
747fb8a8121Smrg            for file in entry.files:
748fb8a8121Smrg                full_path = os.path.join(entry.folder, file)
749fb8a8121Smrg                changelog_location = self.get_changelog_by_path(full_path)
750fb8a8121Smrg                if changelog_location != entry.folder:
751fb8a8121Smrg                    msg = 'wrong ChangeLog location "%s", should be "%s"'
752fb8a8121Smrg                    err = Error(msg % (entry.folder, changelog_location), file)
753fb8a8121Smrg                    self.errors.append(err)
754fb8a8121Smrg
755fb8a8121Smrg    @classmethod
756fb8a8121Smrg    def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
757fb8a8121Smrg        output = ''
758fb8a8121Smrg        for i, author in enumerate(authors):
759fb8a8121Smrg            if i == 0:
760fb8a8121Smrg                output += '%s%s  %s\n' % (prefix, timestamp, author)
761fb8a8121Smrg            else:
762fb8a8121Smrg                output += '%s\t    %s\n' % (prefix, author)
763fb8a8121Smrg        output += '\n'
764fb8a8121Smrg        return output
765fb8a8121Smrg
766fb8a8121Smrg    def to_changelog_entries(self, use_commit_ts=False):
767fb8a8121Smrg        current_timestamp = self.info.date.strftime(DATE_FORMAT)
768fb8a8121Smrg        for entry in self.changelog_entries:
769fb8a8121Smrg            output = ''
770fb8a8121Smrg            timestamp = entry.datetime
771fb8a8121Smrg            if self.revert_commit:
772fb8a8121Smrg                timestamp = current_timestamp
773fb8a8121Smrg                orig_date = self.original_info.date
774fb8a8121Smrg                current_timestamp = orig_date.strftime(DATE_FORMAT)
775fb8a8121Smrg            elif self.cherry_pick_commit:
776fb8a8121Smrg                info = self.commit_to_info_hook(self.cherry_pick_commit)
777fb8a8121Smrg                # it can happen that it is a cherry-pick for a different
778fb8a8121Smrg                # repository
779fb8a8121Smrg                if info:
780fb8a8121Smrg                    timestamp = info.date.strftime(DATE_FORMAT)
781fb8a8121Smrg                else:
782fb8a8121Smrg                    timestamp = current_timestamp
783fb8a8121Smrg            elif not timestamp or use_commit_ts:
784fb8a8121Smrg                timestamp = current_timestamp
785fb8a8121Smrg            authors = entry.authors if entry.authors else [self.info.author]
786fb8a8121Smrg            # add Co-Authored-By authors to all ChangeLog entries
787fb8a8121Smrg            for author in self.co_authors:
788fb8a8121Smrg                if author not in authors:
789fb8a8121Smrg                    authors.append(author)
790fb8a8121Smrg
791fb8a8121Smrg            if self.cherry_pick_commit or self.revert_commit:
792fb8a8121Smrg                original_author = self.original_info.author
793fb8a8121Smrg                output += self.format_authors_in_changelog([original_author],
794fb8a8121Smrg                                                           current_timestamp)
795fb8a8121Smrg                if self.revert_commit:
796fb8a8121Smrg                    output += '\tRevert:\n'
797fb8a8121Smrg                else:
798fb8a8121Smrg                    output += '\tBackported from master:\n'
799fb8a8121Smrg                output += self.format_authors_in_changelog(authors,
800fb8a8121Smrg                                                           timestamp, '\t')
801fb8a8121Smrg            else:
802fb8a8121Smrg                output += self.format_authors_in_changelog(authors, timestamp)
803fb8a8121Smrg            for pr in entry.prs:
804fb8a8121Smrg                output += '\t%s\n' % pr
805fb8a8121Smrg            for line in entry.lines:
806fb8a8121Smrg                output += line + '\n'
807fb8a8121Smrg            yield (entry.folder, output.rstrip())
808fb8a8121Smrg
809fb8a8121Smrg    def print_output(self):
810fb8a8121Smrg        for entry, output in self.to_changelog_entries():
811fb8a8121Smrg            print('------ %s/ChangeLog ------ ' % entry)
812fb8a8121Smrg            print(output)
813fb8a8121Smrg
814fb8a8121Smrg    def print_errors(self):
815fb8a8121Smrg        print('Errors:')
816fb8a8121Smrg        for error in self.errors:
817fb8a8121Smrg            print(error)
818a448f87cSmrg
819a448f87cSmrg    def check_commit_email(self):
820a448f87cSmrg        # Parse 'Martin Liska  <mliska@suse.cz>'
821a448f87cSmrg        email = self.info.author.split(' ')[-1].strip('<>')
822a448f87cSmrg
823a448f87cSmrg        # Verify that all characters are ASCII
824a448f87cSmrg        # TODO: Python 3.7 provides a nicer function: isascii
825a448f87cSmrg        if len(email) != len(email.encode()):
826a448f87cSmrg            self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))
827