xref: /netbsd-src/external/gpl3/gcc.old/dist/contrib/mklog.py (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1*4c3eb207Smrg#!/usr/bin/env python3
2*4c3eb207Smrg
3*4c3eb207Smrg# Copyright (C) 2020 Free Software Foundation, Inc.
4*4c3eb207Smrg#
5*4c3eb207Smrg# This file is part of GCC.
6*4c3eb207Smrg#
7*4c3eb207Smrg# GCC is free software; you can redistribute it and/or modify
8*4c3eb207Smrg# it under the terms of the GNU General Public License as published by
9*4c3eb207Smrg# the Free Software Foundation; either version 3, or (at your option)
10*4c3eb207Smrg# any later version.
11*4c3eb207Smrg#
12*4c3eb207Smrg# GCC is distributed in the hope that it will be useful,
13*4c3eb207Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of
14*4c3eb207Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*4c3eb207Smrg# GNU General Public License for more details.
16*4c3eb207Smrg#
17*4c3eb207Smrg# You should have received a copy of the GNU General Public License
18*4c3eb207Smrg# along with GCC; see the file COPYING.  If not, write to
19*4c3eb207Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20*4c3eb207Smrg# Boston, MA 02110-1301, USA.
21*4c3eb207Smrg
22*4c3eb207Smrg# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23*4c3eb207Smrg# and adds a skeleton ChangeLog file to the file. It does not try to be
24*4c3eb207Smrg# too smart when parsing function names, but it produces a reasonable
25*4c3eb207Smrg# approximation.
26*4c3eb207Smrg#
27*4c3eb207Smrg# Author: Martin Liska <mliska@suse.cz>
28*4c3eb207Smrg
29*4c3eb207Smrgimport argparse
30*4c3eb207Smrgimport datetime
31*4c3eb207Smrgimport os
32*4c3eb207Smrgimport re
33*4c3eb207Smrgimport subprocess
34*4c3eb207Smrgimport sys
35*4c3eb207Smrgfrom itertools import takewhile
36*4c3eb207Smrg
37*4c3eb207Smrgimport requests
38*4c3eb207Smrg
39*4c3eb207Smrgfrom unidiff import PatchSet
40*4c3eb207Smrg
41*4c3eb207Smrgpr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
42*4c3eb207Smrgprnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
43*4c3eb207Smrgdr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
44*4c3eb207Smrgdg_regex = re.compile(r'{\s+dg-(error|warning)')
45*4c3eb207Smrgpr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
46*4c3eb207Smrgidentifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
47*4c3eb207Smrgcomment_regex = re.compile(r'^\/\*')
48*4c3eb207Smrgstruct_regex = re.compile(r'^(class|struct|union|enum)\s+'
49*4c3eb207Smrg                          r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
50*4c3eb207Smrgmacro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
51*4c3eb207Smrgsuper_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
52*4c3eb207Smrgfn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
53*4c3eb207Smrgtemplate_and_param_regex = re.compile(r'<[^<>]*>')
54*4c3eb207Smrgmd_def_regex = re.compile(r'\(define.*\s+"(.*)"')
55*4c3eb207Smrgbugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
56*4c3eb207Smrg               'include_fields=summary,component'
57*4c3eb207Smrg
58*4c3eb207Smrgfunction_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
59*4c3eb207Smrg
60*4c3eb207Smrg# NB: Makefile.in isn't listed as it's not always generated.
61*4c3eb207Smrggenerated_files = {'aclocal.m4', 'config.h.in', 'configure'}
62*4c3eb207Smrg
63*4c3eb207Smrghelp_message = """\
64*4c3eb207SmrgGenerate ChangeLog template for PATCH.
65*4c3eb207SmrgPATCH must be generated using diff(1)'s -up or -cp options
66*4c3eb207Smrg(or their equivalent in git).
67*4c3eb207Smrg"""
68*4c3eb207Smrg
69*4c3eb207Smrgscript_folder = os.path.realpath(__file__)
70*4c3eb207Smrgroot = os.path.dirname(os.path.dirname(script_folder))
71*4c3eb207Smrg
72*4c3eb207Smrgfirstpr = ''
73*4c3eb207Smrg
74*4c3eb207Smrg
75*4c3eb207Smrgdef find_changelog(path):
76*4c3eb207Smrg    folder = os.path.split(path)[0]
77*4c3eb207Smrg    while True:
78*4c3eb207Smrg        if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
79*4c3eb207Smrg            return folder
80*4c3eb207Smrg        folder = os.path.dirname(folder)
81*4c3eb207Smrg        if folder == '':
82*4c3eb207Smrg            return folder
83*4c3eb207Smrg    raise AssertionError()
84*4c3eb207Smrg
85*4c3eb207Smrg
86*4c3eb207Smrgdef extract_function_name(line):
87*4c3eb207Smrg    if comment_regex.match(line):
88*4c3eb207Smrg        return None
89*4c3eb207Smrg    m = struct_regex.search(line)
90*4c3eb207Smrg    if m:
91*4c3eb207Smrg        # Struct declaration
92*4c3eb207Smrg        return m.group(1) + ' ' + m.group(3)
93*4c3eb207Smrg    m = macro_regex.search(line)
94*4c3eb207Smrg    if m:
95*4c3eb207Smrg        # Macro definition
96*4c3eb207Smrg        return m.group(2)
97*4c3eb207Smrg    m = super_macro_regex.search(line)
98*4c3eb207Smrg    if m:
99*4c3eb207Smrg        # Supermacro
100*4c3eb207Smrg        return m.group(1)
101*4c3eb207Smrg    m = fn_regex.search(line)
102*4c3eb207Smrg    if m:
103*4c3eb207Smrg        # Discard template and function parameters.
104*4c3eb207Smrg        fn = m.group(1)
105*4c3eb207Smrg        fn = re.sub(template_and_param_regex, '', fn)
106*4c3eb207Smrg        return fn.rstrip()
107*4c3eb207Smrg    return None
108*4c3eb207Smrg
109*4c3eb207Smrg
110*4c3eb207Smrgdef try_add_function(functions, line):
111*4c3eb207Smrg    fn = extract_function_name(line)
112*4c3eb207Smrg    if fn and fn not in functions:
113*4c3eb207Smrg        functions.append(fn)
114*4c3eb207Smrg    return bool(fn)
115*4c3eb207Smrg
116*4c3eb207Smrg
117*4c3eb207Smrgdef sort_changelog_files(changed_file):
118*4c3eb207Smrg    return (changed_file.is_added_file, changed_file.is_removed_file)
119*4c3eb207Smrg
120*4c3eb207Smrg
121*4c3eb207Smrgdef get_pr_titles(prs):
122*4c3eb207Smrg    output = []
123*4c3eb207Smrg    for idx, pr in enumerate(prs):
124*4c3eb207Smrg        pr_id = pr.split('/')[-1]
125*4c3eb207Smrg        r = requests.get(bugzilla_url % pr_id)
126*4c3eb207Smrg        bugs = r.json()['bugs']
127*4c3eb207Smrg        if len(bugs) == 1:
128*4c3eb207Smrg            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
129*4c3eb207Smrg            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
130*4c3eb207Smrg            if out not in output:
131*4c3eb207Smrg                output.append(out)
132*4c3eb207Smrg    if output:
133*4c3eb207Smrg        output.append('')
134*4c3eb207Smrg    return '\n'.join(output)
135*4c3eb207Smrg
136*4c3eb207Smrg
137*4c3eb207Smrgdef generate_changelog(data, no_functions=False, fill_pr_titles=False,
138*4c3eb207Smrg                       additional_prs=None):
139*4c3eb207Smrg    changelogs = {}
140*4c3eb207Smrg    changelog_list = []
141*4c3eb207Smrg    prs = []
142*4c3eb207Smrg    out = ''
143*4c3eb207Smrg    diff = PatchSet(data)
144*4c3eb207Smrg    global firstpr
145*4c3eb207Smrg
146*4c3eb207Smrg    if additional_prs:
147*4c3eb207Smrg        prs = [pr for pr in additional_prs if pr not in prs]
148*4c3eb207Smrg    for file in diff:
149*4c3eb207Smrg        # skip files that can't be parsed
150*4c3eb207Smrg        if file.path == '/dev/null':
151*4c3eb207Smrg            continue
152*4c3eb207Smrg        changelog = find_changelog(file.path)
153*4c3eb207Smrg        if changelog not in changelogs:
154*4c3eb207Smrg            changelogs[changelog] = []
155*4c3eb207Smrg            changelog_list.append(changelog)
156*4c3eb207Smrg        changelogs[changelog].append(file)
157*4c3eb207Smrg
158*4c3eb207Smrg        # Extract PR entries from newly added tests
159*4c3eb207Smrg        if 'testsuite' in file.path and file.is_added_file:
160*4c3eb207Smrg            # Only search first ten lines as later lines may
161*4c3eb207Smrg            # contains commented code which a note that it
162*4c3eb207Smrg            # has not been tested due to a certain PR or DR.
163*4c3eb207Smrg            this_file_prs = []
164*4c3eb207Smrg            for line in list(file)[0][0:10]:
165*4c3eb207Smrg                m = pr_regex.search(line.value)
166*4c3eb207Smrg                if m:
167*4c3eb207Smrg                    pr = m.group('pr')
168*4c3eb207Smrg                    if pr not in prs:
169*4c3eb207Smrg                        prs.append(pr)
170*4c3eb207Smrg                        this_file_prs.append(pr.split('/')[-1])
171*4c3eb207Smrg                else:
172*4c3eb207Smrg                    m = dr_regex.search(line.value)
173*4c3eb207Smrg                    if m:
174*4c3eb207Smrg                        dr = m.group('dr')
175*4c3eb207Smrg                        if dr not in prs:
176*4c3eb207Smrg                            prs.append(dr)
177*4c3eb207Smrg                            this_file_prs.append(dr.split('/')[-1])
178*4c3eb207Smrg                    elif dg_regex.search(line.value):
179*4c3eb207Smrg                        # Found dg-warning/dg-error line
180*4c3eb207Smrg                        break
181*4c3eb207Smrg            # PR number in the file name
182*4c3eb207Smrg            fname = os.path.basename(file.path)
183*4c3eb207Smrg            m = pr_filename_regex.search(fname)
184*4c3eb207Smrg            if m:
185*4c3eb207Smrg                pr = m.group('pr')
186*4c3eb207Smrg                pr2 = 'PR ' + pr
187*4c3eb207Smrg                if pr not in this_file_prs and pr2 not in prs:
188*4c3eb207Smrg                    prs.append(pr2)
189*4c3eb207Smrg
190*4c3eb207Smrg    if prs:
191*4c3eb207Smrg        firstpr = prs[0]
192*4c3eb207Smrg
193*4c3eb207Smrg    if fill_pr_titles:
194*4c3eb207Smrg        out += get_pr_titles(prs)
195*4c3eb207Smrg
196*4c3eb207Smrg    # print list of PR entries before ChangeLog entries
197*4c3eb207Smrg    if prs:
198*4c3eb207Smrg        if not out:
199*4c3eb207Smrg            out += '\n'
200*4c3eb207Smrg        for pr in prs:
201*4c3eb207Smrg            out += '\t%s\n' % pr
202*4c3eb207Smrg        out += '\n'
203*4c3eb207Smrg
204*4c3eb207Smrg    # sort ChangeLog so that 'testsuite' is at the end
205*4c3eb207Smrg    for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
206*4c3eb207Smrg        files = changelogs[changelog]
207*4c3eb207Smrg        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
208*4c3eb207Smrg        out += '\n'
209*4c3eb207Smrg        # new and deleted files should be at the end
210*4c3eb207Smrg        for file in sorted(files, key=sort_changelog_files):
211*4c3eb207Smrg            assert file.path.startswith(changelog)
212*4c3eb207Smrg            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
213*4c3eb207Smrg            relative_path = file.path[len(changelog):].lstrip('/')
214*4c3eb207Smrg            functions = []
215*4c3eb207Smrg            if file.is_added_file:
216*4c3eb207Smrg                msg = 'New test' if in_tests else 'New file'
217*4c3eb207Smrg                out += '\t* %s: %s.\n' % (relative_path, msg)
218*4c3eb207Smrg            elif file.is_removed_file:
219*4c3eb207Smrg                out += '\t* %s: Removed.\n' % (relative_path)
220*4c3eb207Smrg            elif hasattr(file, 'is_rename') and file.is_rename:
221*4c3eb207Smrg                out += '\t* %s: Moved to...\n' % (relative_path)
222*4c3eb207Smrg                new_path = file.target_file[2:]
223*4c3eb207Smrg                # A file can be theoretically moved to a location that
224*4c3eb207Smrg                # belongs to a different ChangeLog.  Let user fix it.
225*4c3eb207Smrg                if new_path.startswith(changelog):
226*4c3eb207Smrg                    new_path = new_path[len(changelog):].lstrip('/')
227*4c3eb207Smrg                out += '\t* %s: ...here.\n' % (new_path)
228*4c3eb207Smrg            elif os.path.basename(file.path) in generated_files:
229*4c3eb207Smrg                out += '\t* %s: Regenerate.\n' % (relative_path)
230*4c3eb207Smrg            else:
231*4c3eb207Smrg                if not no_functions:
232*4c3eb207Smrg                    for hunk in file:
233*4c3eb207Smrg                        # Do not add function names for testsuite files
234*4c3eb207Smrg                        extension = os.path.splitext(relative_path)[1]
235*4c3eb207Smrg                        if not in_tests and extension in function_extensions:
236*4c3eb207Smrg                            last_fn = None
237*4c3eb207Smrg                            modified_visited = False
238*4c3eb207Smrg                            success = False
239*4c3eb207Smrg                            for line in hunk:
240*4c3eb207Smrg                                m = identifier_regex.match(line.value)
241*4c3eb207Smrg                                if line.is_added or line.is_removed:
242*4c3eb207Smrg                                    # special-case definition in .md files
243*4c3eb207Smrg                                    m2 = md_def_regex.match(line.value)
244*4c3eb207Smrg                                    if extension == '.md' and m2:
245*4c3eb207Smrg                                        fn = m2.group(1)
246*4c3eb207Smrg                                        if fn not in functions:
247*4c3eb207Smrg                                            functions.append(fn)
248*4c3eb207Smrg                                            last_fn = None
249*4c3eb207Smrg                                            success = True
250*4c3eb207Smrg
251*4c3eb207Smrg                                    if not line.value.strip():
252*4c3eb207Smrg                                        continue
253*4c3eb207Smrg                                    modified_visited = True
254*4c3eb207Smrg                                    if m and try_add_function(functions,
255*4c3eb207Smrg                                                              m.group(1)):
256*4c3eb207Smrg                                        last_fn = None
257*4c3eb207Smrg                                        success = True
258*4c3eb207Smrg                                elif line.is_context:
259*4c3eb207Smrg                                    if last_fn and modified_visited:
260*4c3eb207Smrg                                        try_add_function(functions, last_fn)
261*4c3eb207Smrg                                        last_fn = None
262*4c3eb207Smrg                                        modified_visited = False
263*4c3eb207Smrg                                        success = True
264*4c3eb207Smrg                                    elif m:
265*4c3eb207Smrg                                        last_fn = m.group(1)
266*4c3eb207Smrg                                        modified_visited = False
267*4c3eb207Smrg                            if not success:
268*4c3eb207Smrg                                try_add_function(functions,
269*4c3eb207Smrg                                                 hunk.section_header)
270*4c3eb207Smrg                if functions:
271*4c3eb207Smrg                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
272*4c3eb207Smrg                    for fn in functions[1:]:
273*4c3eb207Smrg                        out += '\t(%s):\n' % fn
274*4c3eb207Smrg                else:
275*4c3eb207Smrg                    out += '\t* %s:\n' % relative_path
276*4c3eb207Smrg        out += '\n'
277*4c3eb207Smrg    return out
278*4c3eb207Smrg
279*4c3eb207Smrg
280*4c3eb207Smrgdef update_copyright(data):
281*4c3eb207Smrg    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
282*4c3eb207Smrg    username = subprocess.check_output('git config user.name', shell=True,
283*4c3eb207Smrg                                       encoding='utf8').strip()
284*4c3eb207Smrg    email = subprocess.check_output('git config user.email', shell=True,
285*4c3eb207Smrg                                    encoding='utf8').strip()
286*4c3eb207Smrg
287*4c3eb207Smrg    changelogs = set()
288*4c3eb207Smrg    diff = PatchSet(data)
289*4c3eb207Smrg
290*4c3eb207Smrg    for file in diff:
291*4c3eb207Smrg        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
292*4c3eb207Smrg        if changelog not in changelogs:
293*4c3eb207Smrg            changelogs.add(changelog)
294*4c3eb207Smrg            with open(changelog) as f:
295*4c3eb207Smrg                content = f.read()
296*4c3eb207Smrg            with open(changelog, 'w+') as f:
297*4c3eb207Smrg                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
298*4c3eb207Smrg                f.write('\tUpdate copyright years.\n\n')
299*4c3eb207Smrg                f.write(content)
300*4c3eb207Smrg
301*4c3eb207Smrg
302*4c3eb207Smrgif __name__ == '__main__':
303*4c3eb207Smrg    parser = argparse.ArgumentParser(description=help_message)
304*4c3eb207Smrg    parser.add_argument('input', nargs='?',
305*4c3eb207Smrg                        help='Patch file (or missing, read standard input)')
306*4c3eb207Smrg    parser.add_argument('-b', '--pr-numbers', action='store',
307*4c3eb207Smrg                        type=lambda arg: arg.split(','), nargs='?',
308*4c3eb207Smrg                        help='Add the specified PRs (comma separated)')
309*4c3eb207Smrg    parser.add_argument('-s', '--no-functions', action='store_true',
310*4c3eb207Smrg                        help='Do not generate function names in ChangeLogs')
311*4c3eb207Smrg    parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
312*4c3eb207Smrg                        help='Download title of mentioned PRs')
313*4c3eb207Smrg    parser.add_argument('-d', '--directory',
314*4c3eb207Smrg                        help='Root directory where to search for ChangeLog '
315*4c3eb207Smrg                        'files')
316*4c3eb207Smrg    parser.add_argument('-c', '--changelog',
317*4c3eb207Smrg                        help='Append the ChangeLog to a git commit message '
318*4c3eb207Smrg                             'file')
319*4c3eb207Smrg    parser.add_argument('--update-copyright', action='store_true',
320*4c3eb207Smrg                        help='Update copyright in ChangeLog files')
321*4c3eb207Smrg    args = parser.parse_args()
322*4c3eb207Smrg    if args.input == '-':
323*4c3eb207Smrg        args.input = None
324*4c3eb207Smrg    if args.directory:
325*4c3eb207Smrg        root = args.directory
326*4c3eb207Smrg
327*4c3eb207Smrg    data = open(args.input) if args.input else sys.stdin
328*4c3eb207Smrg    if args.update_copyright:
329*4c3eb207Smrg        update_copyright(data)
330*4c3eb207Smrg    else:
331*4c3eb207Smrg        output = generate_changelog(data, args.no_functions,
332*4c3eb207Smrg                                    args.fill_up_bug_titles, args.pr_numbers)
333*4c3eb207Smrg        if args.changelog:
334*4c3eb207Smrg            lines = open(args.changelog).read().split('\n')
335*4c3eb207Smrg            start = list(takewhile(lambda l: not l.startswith('#'), lines))
336*4c3eb207Smrg            end = lines[len(start):]
337*4c3eb207Smrg            with open(args.changelog, 'w') as f:
338*4c3eb207Smrg                if not start or not start[0]:
339*4c3eb207Smrg                    # initial commit subject line 'component: [PRnnnnn]'
340*4c3eb207Smrg                    m = prnum_regex.match(firstpr)
341*4c3eb207Smrg                    if m:
342*4c3eb207Smrg                        title = f'{m.group("comp")}: [PR{m.group("num")}]'
343*4c3eb207Smrg                        start.insert(0, title)
344*4c3eb207Smrg                if start:
345*4c3eb207Smrg                    # append empty line
346*4c3eb207Smrg                    if start[-1] != '':
347*4c3eb207Smrg                        start.append('')
348*4c3eb207Smrg                else:
349*4c3eb207Smrg                    # append 2 empty lines
350*4c3eb207Smrg                    start = 2 * ['']
351*4c3eb207Smrg                f.write('\n'.join(start))
352*4c3eb207Smrg                f.write('\n')
353*4c3eb207Smrg                f.write(output)
354*4c3eb207Smrg                f.write('\n'.join(end))
355*4c3eb207Smrg        else:
356*4c3eb207Smrg            print(output, end='')
357