1a448f87cSmrg#!/usr/bin/env python3 2a448f87cSmrg 3a448f87cSmrg# Copyright (C) 2020 Free Software Foundation, Inc. 4a448f87cSmrg# 5a448f87cSmrg# This file is part of GCC. 6a448f87cSmrg# 7a448f87cSmrg# GCC is free software; you can redistribute it and/or modify 8a448f87cSmrg# it under the terms of the GNU General Public License as published by 9a448f87cSmrg# the Free Software Foundation; either version 3, or (at your option) 10a448f87cSmrg# any later version. 11a448f87cSmrg# 12a448f87cSmrg# GCC is distributed in the hope that it will be useful, 13a448f87cSmrg# but WITHOUT ANY WARRANTY; without even the implied warranty of 14a448f87cSmrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15a448f87cSmrg# GNU General Public License for more details. 16a448f87cSmrg# 17a448f87cSmrg# You should have received a copy of the GNU General Public License 18a448f87cSmrg# along with GCC; see the file COPYING. If not, write to 19a448f87cSmrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20a448f87cSmrg# Boston, MA 02110-1301, USA. 21a448f87cSmrg 22a448f87cSmrg# This script parses a .diff file generated with 'diff -up' or 'diff -cp' 23a448f87cSmrg# and adds a skeleton ChangeLog file to the file. It does not try to be 24a448f87cSmrg# too smart when parsing function names, but it produces a reasonable 25a448f87cSmrg# approximation. 26a448f87cSmrg# 27a448f87cSmrg# Author: Martin Liska <mliska@suse.cz> 28a448f87cSmrg 29a448f87cSmrgimport argparse 30a448f87cSmrgimport datetime 31a448f87cSmrgimport os 32a448f87cSmrgimport re 33a448f87cSmrgimport subprocess 34a448f87cSmrgimport sys 35a448f87cSmrgfrom itertools import takewhile 36a448f87cSmrg 37a448f87cSmrgimport requests 38a448f87cSmrg 39a448f87cSmrgfrom unidiff import PatchSet 40a448f87cSmrg 41*b1e83836SmrgLINE_LIMIT = 100 42*b1e83836SmrgTAB_WIDTH = 8 43*b1e83836SmrgCO_AUTHORED_BY_PREFIX = 'co-authored-by: ' 44*b1e83836Smrg 45a448f87cSmrgpr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') 46a448f87cSmrgprnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') 47a448f87cSmrgdr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') 48a448f87cSmrgdg_regex = re.compile(r'{\s+dg-(error|warning)') 49a448f87cSmrgpr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') 50a448f87cSmrgidentifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') 51a448f87cSmrgcomment_regex = re.compile(r'^\/\*') 52a448f87cSmrgstruct_regex = re.compile(r'^(class|struct|union|enum)\s+' 53a448f87cSmrg r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)') 54a448f87cSmrgmacro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)') 55a448f87cSmrgsuper_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)') 56a448f87cSmrgfn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') 57a448f87cSmrgtemplate_and_param_regex = re.compile(r'<[^<>]*>') 58a448f87cSmrgmd_def_regex = re.compile(r'\(define.*\s+"(.*)"') 59a448f87cSmrgbugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ 60a448f87cSmrg 'include_fields=summary,component' 61a448f87cSmrg 62a448f87cSmrgfunction_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} 63a448f87cSmrg 64a448f87cSmrg# NB: Makefile.in isn't listed as it's not always generated. 65a448f87cSmrggenerated_files = {'aclocal.m4', 'config.h.in', 'configure'} 66a448f87cSmrg 67a448f87cSmrghelp_message = """\ 68a448f87cSmrgGenerate ChangeLog template for PATCH. 69a448f87cSmrgPATCH must be generated using diff(1)'s -up or -cp options 70a448f87cSmrg(or their equivalent in git). 71a448f87cSmrg""" 72a448f87cSmrg 73a448f87cSmrgscript_folder = os.path.realpath(__file__) 74a448f87cSmrgroot = os.path.dirname(os.path.dirname(script_folder)) 75a448f87cSmrg 76a448f87cSmrgfirstpr = '' 77a448f87cSmrg 78a448f87cSmrg 79a448f87cSmrgdef find_changelog(path): 80a448f87cSmrg folder = os.path.split(path)[0] 81a448f87cSmrg while True: 82a448f87cSmrg if os.path.exists(os.path.join(root, folder, 'ChangeLog')): 83a448f87cSmrg return folder 84a448f87cSmrg folder = os.path.dirname(folder) 85a448f87cSmrg if folder == '': 86a448f87cSmrg return folder 87a448f87cSmrg raise AssertionError() 88a448f87cSmrg 89a448f87cSmrg 90a448f87cSmrgdef extract_function_name(line): 91a448f87cSmrg if comment_regex.match(line): 92a448f87cSmrg return None 93a448f87cSmrg m = struct_regex.search(line) 94a448f87cSmrg if m: 95a448f87cSmrg # Struct declaration 96a448f87cSmrg return m.group(1) + ' ' + m.group(3) 97a448f87cSmrg m = macro_regex.search(line) 98a448f87cSmrg if m: 99a448f87cSmrg # Macro definition 100a448f87cSmrg return m.group(2) 101a448f87cSmrg m = super_macro_regex.search(line) 102a448f87cSmrg if m: 103a448f87cSmrg # Supermacro 104a448f87cSmrg return m.group(1) 105a448f87cSmrg m = fn_regex.search(line) 106a448f87cSmrg if m: 107a448f87cSmrg # Discard template and function parameters. 108a448f87cSmrg fn = m.group(1) 109a448f87cSmrg fn = re.sub(template_and_param_regex, '', fn) 110a448f87cSmrg return fn.rstrip() 111a448f87cSmrg return None 112a448f87cSmrg 113a448f87cSmrg 114a448f87cSmrgdef try_add_function(functions, line): 115a448f87cSmrg fn = extract_function_name(line) 116a448f87cSmrg if fn and fn not in functions: 117a448f87cSmrg functions.append(fn) 118a448f87cSmrg return bool(fn) 119a448f87cSmrg 120a448f87cSmrg 121a448f87cSmrgdef sort_changelog_files(changed_file): 122a448f87cSmrg return (changed_file.is_added_file, changed_file.is_removed_file) 123a448f87cSmrg 124a448f87cSmrg 125a448f87cSmrgdef get_pr_titles(prs): 126a448f87cSmrg output = [] 127a448f87cSmrg for idx, pr in enumerate(prs): 128a448f87cSmrg pr_id = pr.split('/')[-1] 129a448f87cSmrg r = requests.get(bugzilla_url % pr_id) 130a448f87cSmrg bugs = r.json()['bugs'] 131a448f87cSmrg if len(bugs) == 1: 132a448f87cSmrg prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) 133a448f87cSmrg out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) 134a448f87cSmrg if out not in output: 135a448f87cSmrg output.append(out) 136a448f87cSmrg if output: 137a448f87cSmrg output.append('') 138a448f87cSmrg return '\n'.join(output) 139a448f87cSmrg 140a448f87cSmrg 141*b1e83836Smrgdef append_changelog_line(out, relative_path, text): 142*b1e83836Smrg line = f'\t* {relative_path}:' 143*b1e83836Smrg if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT: 144*b1e83836Smrg out += f'{line} {text}\n' 145*b1e83836Smrg else: 146*b1e83836Smrg out += f'{line}\n' 147*b1e83836Smrg out += f'\t{text}\n' 148*b1e83836Smrg return out 149*b1e83836Smrg 150*b1e83836Smrg 151*b1e83836Smrgdef get_rel_path_if_prefixed(path, folder): 152*b1e83836Smrg if path.startswith(folder): 153*b1e83836Smrg return path[len(folder):].lstrip('/') 154*b1e83836Smrg else: 155*b1e83836Smrg return path 156*b1e83836Smrg 157*b1e83836Smrg 158a448f87cSmrgdef generate_changelog(data, no_functions=False, fill_pr_titles=False, 159a448f87cSmrg additional_prs=None): 160a448f87cSmrg changelogs = {} 161a448f87cSmrg changelog_list = [] 162a448f87cSmrg prs = [] 163a448f87cSmrg out = '' 164a448f87cSmrg diff = PatchSet(data) 165a448f87cSmrg global firstpr 166a448f87cSmrg 167a448f87cSmrg if additional_prs: 168*b1e83836Smrg for apr in additional_prs: 169*b1e83836Smrg if not apr.startswith('PR ') and '/' in apr: 170*b1e83836Smrg apr = 'PR ' + apr 171*b1e83836Smrg if apr not in prs: 172*b1e83836Smrg prs.append(apr) 173a448f87cSmrg for file in diff: 174a448f87cSmrg # skip files that can't be parsed 175a448f87cSmrg if file.path == '/dev/null': 176a448f87cSmrg continue 177a448f87cSmrg changelog = find_changelog(file.path) 178a448f87cSmrg if changelog not in changelogs: 179a448f87cSmrg changelogs[changelog] = [] 180a448f87cSmrg changelog_list.append(changelog) 181a448f87cSmrg changelogs[changelog].append(file) 182a448f87cSmrg 183a448f87cSmrg # Extract PR entries from newly added tests 184a448f87cSmrg if 'testsuite' in file.path and file.is_added_file: 185a448f87cSmrg # Only search first ten lines as later lines may 186a448f87cSmrg # contains commented code which a note that it 187a448f87cSmrg # has not been tested due to a certain PR or DR. 188a448f87cSmrg this_file_prs = [] 189a448f87cSmrg for line in list(file)[0][0:10]: 190a448f87cSmrg m = pr_regex.search(line.value) 191a448f87cSmrg if m: 192a448f87cSmrg pr = m.group('pr') 193a448f87cSmrg if pr not in prs: 194a448f87cSmrg prs.append(pr) 195a448f87cSmrg this_file_prs.append(pr.split('/')[-1]) 196a448f87cSmrg else: 197a448f87cSmrg m = dr_regex.search(line.value) 198a448f87cSmrg if m: 199a448f87cSmrg dr = m.group('dr') 200a448f87cSmrg if dr not in prs: 201a448f87cSmrg prs.append(dr) 202a448f87cSmrg this_file_prs.append(dr.split('/')[-1]) 203a448f87cSmrg elif dg_regex.search(line.value): 204a448f87cSmrg # Found dg-warning/dg-error line 205a448f87cSmrg break 206a448f87cSmrg # PR number in the file name 207a448f87cSmrg fname = os.path.basename(file.path) 208a448f87cSmrg m = pr_filename_regex.search(fname) 209a448f87cSmrg if m: 210a448f87cSmrg pr = m.group('pr') 211a448f87cSmrg pr2 = 'PR ' + pr 212a448f87cSmrg if pr not in this_file_prs and pr2 not in prs: 213a448f87cSmrg prs.append(pr2) 214a448f87cSmrg 215a448f87cSmrg if prs: 216a448f87cSmrg firstpr = prs[0] 217a448f87cSmrg 218a448f87cSmrg if fill_pr_titles: 219a448f87cSmrg out += get_pr_titles(prs) 220a448f87cSmrg 221a448f87cSmrg # print list of PR entries before ChangeLog entries 222a448f87cSmrg if prs: 223a448f87cSmrg if not out: 224a448f87cSmrg out += '\n' 225a448f87cSmrg for pr in prs: 226a448f87cSmrg out += '\t%s\n' % pr 227a448f87cSmrg out += '\n' 228a448f87cSmrg 229a448f87cSmrg # sort ChangeLog so that 'testsuite' is at the end 230a448f87cSmrg for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x): 231a448f87cSmrg files = changelogs[changelog] 232a448f87cSmrg out += '%s:\n' % os.path.join(changelog, 'ChangeLog') 233a448f87cSmrg out += '\n' 234a448f87cSmrg # new and deleted files should be at the end 235a448f87cSmrg for file in sorted(files, key=sort_changelog_files): 236a448f87cSmrg assert file.path.startswith(changelog) 237a448f87cSmrg in_tests = 'testsuite' in changelog or 'testsuite' in file.path 238*b1e83836Smrg relative_path = get_rel_path_if_prefixed(file.path, changelog) 239a448f87cSmrg functions = [] 240a448f87cSmrg if file.is_added_file: 241*b1e83836Smrg msg = 'New test.' if in_tests else 'New file.' 242*b1e83836Smrg out = append_changelog_line(out, relative_path, msg) 243a448f87cSmrg elif file.is_removed_file: 244*b1e83836Smrg out = append_changelog_line(out, relative_path, 'Removed.') 245a448f87cSmrg elif hasattr(file, 'is_rename') and file.is_rename: 246a448f87cSmrg # A file can be theoretically moved to a location that 247a448f87cSmrg # belongs to a different ChangeLog. Let user fix it. 248*b1e83836Smrg # 249*b1e83836Smrg # Since unidiff 0.7.0, path.file == path.target_file[2:], 250*b1e83836Smrg # it used to be path.source_file[2:] 251*b1e83836Smrg relative_path = get_rel_path_if_prefixed(file.source_file[2:], 252*b1e83836Smrg changelog) 253*b1e83836Smrg out = append_changelog_line(out, relative_path, 'Moved to...') 254*b1e83836Smrg new_path = get_rel_path_if_prefixed(file.target_file[2:], 255*b1e83836Smrg changelog) 256*b1e83836Smrg out += f'\t* {new_path}: ...here.\n' 257a448f87cSmrg elif os.path.basename(file.path) in generated_files: 258a448f87cSmrg out += '\t* %s: Regenerate.\n' % (relative_path) 259*b1e83836Smrg append_changelog_line(out, relative_path, 'Regenerate.') 260a448f87cSmrg else: 261a448f87cSmrg if not no_functions: 262a448f87cSmrg for hunk in file: 263a448f87cSmrg # Do not add function names for testsuite files 264a448f87cSmrg extension = os.path.splitext(relative_path)[1] 265a448f87cSmrg if not in_tests and extension in function_extensions: 266a448f87cSmrg last_fn = None 267a448f87cSmrg modified_visited = False 268a448f87cSmrg success = False 269a448f87cSmrg for line in hunk: 270a448f87cSmrg m = identifier_regex.match(line.value) 271a448f87cSmrg if line.is_added or line.is_removed: 272a448f87cSmrg # special-case definition in .md files 273a448f87cSmrg m2 = md_def_regex.match(line.value) 274a448f87cSmrg if extension == '.md' and m2: 275a448f87cSmrg fn = m2.group(1) 276a448f87cSmrg if fn not in functions: 277a448f87cSmrg functions.append(fn) 278a448f87cSmrg last_fn = None 279a448f87cSmrg success = True 280a448f87cSmrg 281a448f87cSmrg if not line.value.strip(): 282a448f87cSmrg continue 283a448f87cSmrg modified_visited = True 284a448f87cSmrg if m and try_add_function(functions, 285a448f87cSmrg m.group(1)): 286a448f87cSmrg last_fn = None 287a448f87cSmrg success = True 288a448f87cSmrg elif line.is_context: 289a448f87cSmrg if last_fn and modified_visited: 290a448f87cSmrg try_add_function(functions, last_fn) 291a448f87cSmrg last_fn = None 292a448f87cSmrg modified_visited = False 293a448f87cSmrg success = True 294a448f87cSmrg elif m: 295a448f87cSmrg last_fn = m.group(1) 296a448f87cSmrg modified_visited = False 297a448f87cSmrg if not success: 298a448f87cSmrg try_add_function(functions, 299a448f87cSmrg hunk.section_header) 300a448f87cSmrg if functions: 301a448f87cSmrg out += '\t* %s (%s):\n' % (relative_path, functions[0]) 302a448f87cSmrg for fn in functions[1:]: 303a448f87cSmrg out += '\t(%s):\n' % fn 304a448f87cSmrg else: 305a448f87cSmrg out += '\t* %s:\n' % relative_path 306a448f87cSmrg out += '\n' 307a448f87cSmrg return out 308a448f87cSmrg 309a448f87cSmrg 310a448f87cSmrgdef update_copyright(data): 311a448f87cSmrg current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') 312a448f87cSmrg username = subprocess.check_output('git config user.name', shell=True, 313a448f87cSmrg encoding='utf8').strip() 314a448f87cSmrg email = subprocess.check_output('git config user.email', shell=True, 315a448f87cSmrg encoding='utf8').strip() 316a448f87cSmrg 317a448f87cSmrg changelogs = set() 318a448f87cSmrg diff = PatchSet(data) 319a448f87cSmrg 320a448f87cSmrg for file in diff: 321a448f87cSmrg changelog = os.path.join(find_changelog(file.path), 'ChangeLog') 322a448f87cSmrg if changelog not in changelogs: 323a448f87cSmrg changelogs.add(changelog) 324a448f87cSmrg with open(changelog) as f: 325a448f87cSmrg content = f.read() 326a448f87cSmrg with open(changelog, 'w+') as f: 327a448f87cSmrg f.write(f'{current_timestamp} {username} <{email}>\n\n') 328a448f87cSmrg f.write('\tUpdate copyright years.\n\n') 329a448f87cSmrg f.write(content) 330a448f87cSmrg 331a448f87cSmrg 332*b1e83836Smrgdef skip_line_in_changelog(line): 333*b1e83836Smrg if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'): 334*b1e83836Smrg return False 335*b1e83836Smrg return True 336*b1e83836Smrg 337*b1e83836Smrg 338a448f87cSmrgif __name__ == '__main__': 339a448f87cSmrg parser = argparse.ArgumentParser(description=help_message) 340a448f87cSmrg parser.add_argument('input', nargs='?', 341a448f87cSmrg help='Patch file (or missing, read standard input)') 342a448f87cSmrg parser.add_argument('-b', '--pr-numbers', action='store', 343a448f87cSmrg type=lambda arg: arg.split(','), nargs='?', 344a448f87cSmrg help='Add the specified PRs (comma separated)') 345a448f87cSmrg parser.add_argument('-s', '--no-functions', action='store_true', 346a448f87cSmrg help='Do not generate function names in ChangeLogs') 347a448f87cSmrg parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', 348a448f87cSmrg help='Download title of mentioned PRs') 349a448f87cSmrg parser.add_argument('-d', '--directory', 350a448f87cSmrg help='Root directory where to search for ChangeLog ' 351a448f87cSmrg 'files') 352a448f87cSmrg parser.add_argument('-c', '--changelog', 353a448f87cSmrg help='Append the ChangeLog to a git commit message ' 354a448f87cSmrg 'file') 355a448f87cSmrg parser.add_argument('--update-copyright', action='store_true', 356a448f87cSmrg help='Update copyright in ChangeLog files') 357a448f87cSmrg args = parser.parse_args() 358a448f87cSmrg if args.input == '-': 359a448f87cSmrg args.input = None 360a448f87cSmrg if args.directory: 361a448f87cSmrg root = args.directory 362a448f87cSmrg 363a448f87cSmrg data = open(args.input) if args.input else sys.stdin 364a448f87cSmrg if args.update_copyright: 365a448f87cSmrg update_copyright(data) 366a448f87cSmrg else: 367a448f87cSmrg output = generate_changelog(data, args.no_functions, 368a448f87cSmrg args.fill_up_bug_titles, args.pr_numbers) 369a448f87cSmrg if args.changelog: 370a448f87cSmrg lines = open(args.changelog).read().split('\n') 371*b1e83836Smrg start = list(takewhile(skip_line_in_changelog, lines)) 372a448f87cSmrg end = lines[len(start):] 373a448f87cSmrg with open(args.changelog, 'w') as f: 374a448f87cSmrg if not start or not start[0]: 375a448f87cSmrg # initial commit subject line 'component: [PRnnnnn]' 376a448f87cSmrg m = prnum_regex.match(firstpr) 377a448f87cSmrg if m: 378a448f87cSmrg title = f'{m.group("comp")}: [PR{m.group("num")}]' 379a448f87cSmrg start.insert(0, title) 380a448f87cSmrg if start: 381a448f87cSmrg # append empty line 382a448f87cSmrg if start[-1] != '': 383a448f87cSmrg start.append('') 384a448f87cSmrg else: 385a448f87cSmrg # append 2 empty lines 386a448f87cSmrg start = 2 * [''] 387a448f87cSmrg f.write('\n'.join(start)) 388a448f87cSmrg f.write('\n') 389a448f87cSmrg f.write(output) 390a448f87cSmrg f.write('\n'.join(end)) 391a448f87cSmrg else: 392a448f87cSmrg print(output, end='') 393