1*4c3eb207Smrg#!/usr/bin/env python3 2*4c3eb207Smrg 3*4c3eb207Smrg# Copyright (C) 2020 Free Software Foundation, Inc. 4*4c3eb207Smrg# 5*4c3eb207Smrg# This file is part of GCC. 6*4c3eb207Smrg# 7*4c3eb207Smrg# GCC is free software; you can redistribute it and/or modify 8*4c3eb207Smrg# it under the terms of the GNU General Public License as published by 9*4c3eb207Smrg# the Free Software Foundation; either version 3, or (at your option) 10*4c3eb207Smrg# any later version. 11*4c3eb207Smrg# 12*4c3eb207Smrg# GCC is distributed in the hope that it will be useful, 13*4c3eb207Smrg# but WITHOUT ANY WARRANTY; without even the implied warranty of 14*4c3eb207Smrg# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*4c3eb207Smrg# GNU General Public License for more details. 16*4c3eb207Smrg# 17*4c3eb207Smrg# You should have received a copy of the GNU General Public License 18*4c3eb207Smrg# along with GCC; see the file COPYING. If not, write to 19*4c3eb207Smrg# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20*4c3eb207Smrg# Boston, MA 02110-1301, USA. 21*4c3eb207Smrg 22*4c3eb207Smrg# This script parses a .diff file generated with 'diff -up' or 'diff -cp' 23*4c3eb207Smrg# and adds a skeleton ChangeLog file to the file. It does not try to be 24*4c3eb207Smrg# too smart when parsing function names, but it produces a reasonable 25*4c3eb207Smrg# approximation. 26*4c3eb207Smrg# 27*4c3eb207Smrg# Author: Martin Liska <mliska@suse.cz> 28*4c3eb207Smrg 29*4c3eb207Smrgimport argparse 30*4c3eb207Smrgimport datetime 31*4c3eb207Smrgimport os 32*4c3eb207Smrgimport re 33*4c3eb207Smrgimport subprocess 34*4c3eb207Smrgimport sys 35*4c3eb207Smrgfrom itertools import takewhile 36*4c3eb207Smrg 37*4c3eb207Smrgimport requests 38*4c3eb207Smrg 39*4c3eb207Smrgfrom unidiff import PatchSet 40*4c3eb207Smrg 41*4c3eb207Smrgpr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') 42*4c3eb207Smrgprnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') 43*4c3eb207Smrgdr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') 44*4c3eb207Smrgdg_regex = re.compile(r'{\s+dg-(error|warning)') 45*4c3eb207Smrgpr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') 46*4c3eb207Smrgidentifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') 47*4c3eb207Smrgcomment_regex = re.compile(r'^\/\*') 48*4c3eb207Smrgstruct_regex = re.compile(r'^(class|struct|union|enum)\s+' 49*4c3eb207Smrg r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)') 50*4c3eb207Smrgmacro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)') 51*4c3eb207Smrgsuper_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)') 52*4c3eb207Smrgfn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') 53*4c3eb207Smrgtemplate_and_param_regex = re.compile(r'<[^<>]*>') 54*4c3eb207Smrgmd_def_regex = re.compile(r'\(define.*\s+"(.*)"') 55*4c3eb207Smrgbugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ 56*4c3eb207Smrg 'include_fields=summary,component' 57*4c3eb207Smrg 58*4c3eb207Smrgfunction_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} 59*4c3eb207Smrg 60*4c3eb207Smrg# NB: Makefile.in isn't listed as it's not always generated. 61*4c3eb207Smrggenerated_files = {'aclocal.m4', 'config.h.in', 'configure'} 62*4c3eb207Smrg 63*4c3eb207Smrghelp_message = """\ 64*4c3eb207SmrgGenerate ChangeLog template for PATCH. 65*4c3eb207SmrgPATCH must be generated using diff(1)'s -up or -cp options 66*4c3eb207Smrg(or their equivalent in git). 67*4c3eb207Smrg""" 68*4c3eb207Smrg 69*4c3eb207Smrgscript_folder = os.path.realpath(__file__) 70*4c3eb207Smrgroot = os.path.dirname(os.path.dirname(script_folder)) 71*4c3eb207Smrg 72*4c3eb207Smrgfirstpr = '' 73*4c3eb207Smrg 74*4c3eb207Smrg 75*4c3eb207Smrgdef find_changelog(path): 76*4c3eb207Smrg folder = os.path.split(path)[0] 77*4c3eb207Smrg while True: 78*4c3eb207Smrg if os.path.exists(os.path.join(root, folder, 'ChangeLog')): 79*4c3eb207Smrg return folder 80*4c3eb207Smrg folder = os.path.dirname(folder) 81*4c3eb207Smrg if folder == '': 82*4c3eb207Smrg return folder 83*4c3eb207Smrg raise AssertionError() 84*4c3eb207Smrg 85*4c3eb207Smrg 86*4c3eb207Smrgdef extract_function_name(line): 87*4c3eb207Smrg if comment_regex.match(line): 88*4c3eb207Smrg return None 89*4c3eb207Smrg m = struct_regex.search(line) 90*4c3eb207Smrg if m: 91*4c3eb207Smrg # Struct declaration 92*4c3eb207Smrg return m.group(1) + ' ' + m.group(3) 93*4c3eb207Smrg m = macro_regex.search(line) 94*4c3eb207Smrg if m: 95*4c3eb207Smrg # Macro definition 96*4c3eb207Smrg return m.group(2) 97*4c3eb207Smrg m = super_macro_regex.search(line) 98*4c3eb207Smrg if m: 99*4c3eb207Smrg # Supermacro 100*4c3eb207Smrg return m.group(1) 101*4c3eb207Smrg m = fn_regex.search(line) 102*4c3eb207Smrg if m: 103*4c3eb207Smrg # Discard template and function parameters. 104*4c3eb207Smrg fn = m.group(1) 105*4c3eb207Smrg fn = re.sub(template_and_param_regex, '', fn) 106*4c3eb207Smrg return fn.rstrip() 107*4c3eb207Smrg return None 108*4c3eb207Smrg 109*4c3eb207Smrg 110*4c3eb207Smrgdef try_add_function(functions, line): 111*4c3eb207Smrg fn = extract_function_name(line) 112*4c3eb207Smrg if fn and fn not in functions: 113*4c3eb207Smrg functions.append(fn) 114*4c3eb207Smrg return bool(fn) 115*4c3eb207Smrg 116*4c3eb207Smrg 117*4c3eb207Smrgdef sort_changelog_files(changed_file): 118*4c3eb207Smrg return (changed_file.is_added_file, changed_file.is_removed_file) 119*4c3eb207Smrg 120*4c3eb207Smrg 121*4c3eb207Smrgdef get_pr_titles(prs): 122*4c3eb207Smrg output = [] 123*4c3eb207Smrg for idx, pr in enumerate(prs): 124*4c3eb207Smrg pr_id = pr.split('/')[-1] 125*4c3eb207Smrg r = requests.get(bugzilla_url % pr_id) 126*4c3eb207Smrg bugs = r.json()['bugs'] 127*4c3eb207Smrg if len(bugs) == 1: 128*4c3eb207Smrg prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) 129*4c3eb207Smrg out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) 130*4c3eb207Smrg if out not in output: 131*4c3eb207Smrg output.append(out) 132*4c3eb207Smrg if output: 133*4c3eb207Smrg output.append('') 134*4c3eb207Smrg return '\n'.join(output) 135*4c3eb207Smrg 136*4c3eb207Smrg 137*4c3eb207Smrgdef generate_changelog(data, no_functions=False, fill_pr_titles=False, 138*4c3eb207Smrg additional_prs=None): 139*4c3eb207Smrg changelogs = {} 140*4c3eb207Smrg changelog_list = [] 141*4c3eb207Smrg prs = [] 142*4c3eb207Smrg out = '' 143*4c3eb207Smrg diff = PatchSet(data) 144*4c3eb207Smrg global firstpr 145*4c3eb207Smrg 146*4c3eb207Smrg if additional_prs: 147*4c3eb207Smrg prs = [pr for pr in additional_prs if pr not in prs] 148*4c3eb207Smrg for file in diff: 149*4c3eb207Smrg # skip files that can't be parsed 150*4c3eb207Smrg if file.path == '/dev/null': 151*4c3eb207Smrg continue 152*4c3eb207Smrg changelog = find_changelog(file.path) 153*4c3eb207Smrg if changelog not in changelogs: 154*4c3eb207Smrg changelogs[changelog] = [] 155*4c3eb207Smrg changelog_list.append(changelog) 156*4c3eb207Smrg changelogs[changelog].append(file) 157*4c3eb207Smrg 158*4c3eb207Smrg # Extract PR entries from newly added tests 159*4c3eb207Smrg if 'testsuite' in file.path and file.is_added_file: 160*4c3eb207Smrg # Only search first ten lines as later lines may 161*4c3eb207Smrg # contains commented code which a note that it 162*4c3eb207Smrg # has not been tested due to a certain PR or DR. 163*4c3eb207Smrg this_file_prs = [] 164*4c3eb207Smrg for line in list(file)[0][0:10]: 165*4c3eb207Smrg m = pr_regex.search(line.value) 166*4c3eb207Smrg if m: 167*4c3eb207Smrg pr = m.group('pr') 168*4c3eb207Smrg if pr not in prs: 169*4c3eb207Smrg prs.append(pr) 170*4c3eb207Smrg this_file_prs.append(pr.split('/')[-1]) 171*4c3eb207Smrg else: 172*4c3eb207Smrg m = dr_regex.search(line.value) 173*4c3eb207Smrg if m: 174*4c3eb207Smrg dr = m.group('dr') 175*4c3eb207Smrg if dr not in prs: 176*4c3eb207Smrg prs.append(dr) 177*4c3eb207Smrg this_file_prs.append(dr.split('/')[-1]) 178*4c3eb207Smrg elif dg_regex.search(line.value): 179*4c3eb207Smrg # Found dg-warning/dg-error line 180*4c3eb207Smrg break 181*4c3eb207Smrg # PR number in the file name 182*4c3eb207Smrg fname = os.path.basename(file.path) 183*4c3eb207Smrg m = pr_filename_regex.search(fname) 184*4c3eb207Smrg if m: 185*4c3eb207Smrg pr = m.group('pr') 186*4c3eb207Smrg pr2 = 'PR ' + pr 187*4c3eb207Smrg if pr not in this_file_prs and pr2 not in prs: 188*4c3eb207Smrg prs.append(pr2) 189*4c3eb207Smrg 190*4c3eb207Smrg if prs: 191*4c3eb207Smrg firstpr = prs[0] 192*4c3eb207Smrg 193*4c3eb207Smrg if fill_pr_titles: 194*4c3eb207Smrg out += get_pr_titles(prs) 195*4c3eb207Smrg 196*4c3eb207Smrg # print list of PR entries before ChangeLog entries 197*4c3eb207Smrg if prs: 198*4c3eb207Smrg if not out: 199*4c3eb207Smrg out += '\n' 200*4c3eb207Smrg for pr in prs: 201*4c3eb207Smrg out += '\t%s\n' % pr 202*4c3eb207Smrg out += '\n' 203*4c3eb207Smrg 204*4c3eb207Smrg # sort ChangeLog so that 'testsuite' is at the end 205*4c3eb207Smrg for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x): 206*4c3eb207Smrg files = changelogs[changelog] 207*4c3eb207Smrg out += '%s:\n' % os.path.join(changelog, 'ChangeLog') 208*4c3eb207Smrg out += '\n' 209*4c3eb207Smrg # new and deleted files should be at the end 210*4c3eb207Smrg for file in sorted(files, key=sort_changelog_files): 211*4c3eb207Smrg assert file.path.startswith(changelog) 212*4c3eb207Smrg in_tests = 'testsuite' in changelog or 'testsuite' in file.path 213*4c3eb207Smrg relative_path = file.path[len(changelog):].lstrip('/') 214*4c3eb207Smrg functions = [] 215*4c3eb207Smrg if file.is_added_file: 216*4c3eb207Smrg msg = 'New test' if in_tests else 'New file' 217*4c3eb207Smrg out += '\t* %s: %s.\n' % (relative_path, msg) 218*4c3eb207Smrg elif file.is_removed_file: 219*4c3eb207Smrg out += '\t* %s: Removed.\n' % (relative_path) 220*4c3eb207Smrg elif hasattr(file, 'is_rename') and file.is_rename: 221*4c3eb207Smrg out += '\t* %s: Moved to...\n' % (relative_path) 222*4c3eb207Smrg new_path = file.target_file[2:] 223*4c3eb207Smrg # A file can be theoretically moved to a location that 224*4c3eb207Smrg # belongs to a different ChangeLog. Let user fix it. 225*4c3eb207Smrg if new_path.startswith(changelog): 226*4c3eb207Smrg new_path = new_path[len(changelog):].lstrip('/') 227*4c3eb207Smrg out += '\t* %s: ...here.\n' % (new_path) 228*4c3eb207Smrg elif os.path.basename(file.path) in generated_files: 229*4c3eb207Smrg out += '\t* %s: Regenerate.\n' % (relative_path) 230*4c3eb207Smrg else: 231*4c3eb207Smrg if not no_functions: 232*4c3eb207Smrg for hunk in file: 233*4c3eb207Smrg # Do not add function names for testsuite files 234*4c3eb207Smrg extension = os.path.splitext(relative_path)[1] 235*4c3eb207Smrg if not in_tests and extension in function_extensions: 236*4c3eb207Smrg last_fn = None 237*4c3eb207Smrg modified_visited = False 238*4c3eb207Smrg success = False 239*4c3eb207Smrg for line in hunk: 240*4c3eb207Smrg m = identifier_regex.match(line.value) 241*4c3eb207Smrg if line.is_added or line.is_removed: 242*4c3eb207Smrg # special-case definition in .md files 243*4c3eb207Smrg m2 = md_def_regex.match(line.value) 244*4c3eb207Smrg if extension == '.md' and m2: 245*4c3eb207Smrg fn = m2.group(1) 246*4c3eb207Smrg if fn not in functions: 247*4c3eb207Smrg functions.append(fn) 248*4c3eb207Smrg last_fn = None 249*4c3eb207Smrg success = True 250*4c3eb207Smrg 251*4c3eb207Smrg if not line.value.strip(): 252*4c3eb207Smrg continue 253*4c3eb207Smrg modified_visited = True 254*4c3eb207Smrg if m and try_add_function(functions, 255*4c3eb207Smrg m.group(1)): 256*4c3eb207Smrg last_fn = None 257*4c3eb207Smrg success = True 258*4c3eb207Smrg elif line.is_context: 259*4c3eb207Smrg if last_fn and modified_visited: 260*4c3eb207Smrg try_add_function(functions, last_fn) 261*4c3eb207Smrg last_fn = None 262*4c3eb207Smrg modified_visited = False 263*4c3eb207Smrg success = True 264*4c3eb207Smrg elif m: 265*4c3eb207Smrg last_fn = m.group(1) 266*4c3eb207Smrg modified_visited = False 267*4c3eb207Smrg if not success: 268*4c3eb207Smrg try_add_function(functions, 269*4c3eb207Smrg hunk.section_header) 270*4c3eb207Smrg if functions: 271*4c3eb207Smrg out += '\t* %s (%s):\n' % (relative_path, functions[0]) 272*4c3eb207Smrg for fn in functions[1:]: 273*4c3eb207Smrg out += '\t(%s):\n' % fn 274*4c3eb207Smrg else: 275*4c3eb207Smrg out += '\t* %s:\n' % relative_path 276*4c3eb207Smrg out += '\n' 277*4c3eb207Smrg return out 278*4c3eb207Smrg 279*4c3eb207Smrg 280*4c3eb207Smrgdef update_copyright(data): 281*4c3eb207Smrg current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') 282*4c3eb207Smrg username = subprocess.check_output('git config user.name', shell=True, 283*4c3eb207Smrg encoding='utf8').strip() 284*4c3eb207Smrg email = subprocess.check_output('git config user.email', shell=True, 285*4c3eb207Smrg encoding='utf8').strip() 286*4c3eb207Smrg 287*4c3eb207Smrg changelogs = set() 288*4c3eb207Smrg diff = PatchSet(data) 289*4c3eb207Smrg 290*4c3eb207Smrg for file in diff: 291*4c3eb207Smrg changelog = os.path.join(find_changelog(file.path), 'ChangeLog') 292*4c3eb207Smrg if changelog not in changelogs: 293*4c3eb207Smrg changelogs.add(changelog) 294*4c3eb207Smrg with open(changelog) as f: 295*4c3eb207Smrg content = f.read() 296*4c3eb207Smrg with open(changelog, 'w+') as f: 297*4c3eb207Smrg f.write(f'{current_timestamp} {username} <{email}>\n\n') 298*4c3eb207Smrg f.write('\tUpdate copyright years.\n\n') 299*4c3eb207Smrg f.write(content) 300*4c3eb207Smrg 301*4c3eb207Smrg 302*4c3eb207Smrgif __name__ == '__main__': 303*4c3eb207Smrg parser = argparse.ArgumentParser(description=help_message) 304*4c3eb207Smrg parser.add_argument('input', nargs='?', 305*4c3eb207Smrg help='Patch file (or missing, read standard input)') 306*4c3eb207Smrg parser.add_argument('-b', '--pr-numbers', action='store', 307*4c3eb207Smrg type=lambda arg: arg.split(','), nargs='?', 308*4c3eb207Smrg help='Add the specified PRs (comma separated)') 309*4c3eb207Smrg parser.add_argument('-s', '--no-functions', action='store_true', 310*4c3eb207Smrg help='Do not generate function names in ChangeLogs') 311*4c3eb207Smrg parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', 312*4c3eb207Smrg help='Download title of mentioned PRs') 313*4c3eb207Smrg parser.add_argument('-d', '--directory', 314*4c3eb207Smrg help='Root directory where to search for ChangeLog ' 315*4c3eb207Smrg 'files') 316*4c3eb207Smrg parser.add_argument('-c', '--changelog', 317*4c3eb207Smrg help='Append the ChangeLog to a git commit message ' 318*4c3eb207Smrg 'file') 319*4c3eb207Smrg parser.add_argument('--update-copyright', action='store_true', 320*4c3eb207Smrg help='Update copyright in ChangeLog files') 321*4c3eb207Smrg args = parser.parse_args() 322*4c3eb207Smrg if args.input == '-': 323*4c3eb207Smrg args.input = None 324*4c3eb207Smrg if args.directory: 325*4c3eb207Smrg root = args.directory 326*4c3eb207Smrg 327*4c3eb207Smrg data = open(args.input) if args.input else sys.stdin 328*4c3eb207Smrg if args.update_copyright: 329*4c3eb207Smrg update_copyright(data) 330*4c3eb207Smrg else: 331*4c3eb207Smrg output = generate_changelog(data, args.no_functions, 332*4c3eb207Smrg args.fill_up_bug_titles, args.pr_numbers) 333*4c3eb207Smrg if args.changelog: 334*4c3eb207Smrg lines = open(args.changelog).read().split('\n') 335*4c3eb207Smrg start = list(takewhile(lambda l: not l.startswith('#'), lines)) 336*4c3eb207Smrg end = lines[len(start):] 337*4c3eb207Smrg with open(args.changelog, 'w') as f: 338*4c3eb207Smrg if not start or not start[0]: 339*4c3eb207Smrg # initial commit subject line 'component: [PRnnnnn]' 340*4c3eb207Smrg m = prnum_regex.match(firstpr) 341*4c3eb207Smrg if m: 342*4c3eb207Smrg title = f'{m.group("comp")}: [PR{m.group("num")}]' 343*4c3eb207Smrg start.insert(0, title) 344*4c3eb207Smrg if start: 345*4c3eb207Smrg # append empty line 346*4c3eb207Smrg if start[-1] != '': 347*4c3eb207Smrg start.append('') 348*4c3eb207Smrg else: 349*4c3eb207Smrg # append 2 empty lines 350*4c3eb207Smrg start = 2 * [''] 351*4c3eb207Smrg f.write('\n'.join(start)) 352*4c3eb207Smrg f.write('\n') 353*4c3eb207Smrg f.write(output) 354*4c3eb207Smrg f.write('\n'.join(end)) 355*4c3eb207Smrg else: 356*4c3eb207Smrg print(output, end='') 357