1fb8a8121Smrg#!/usr/bin/env python3 2fb8a8121Smrg# 3fb8a8121Smrg# This file is part of GCC. 4fb8a8121Smrg# 5fb8a8121Smrg# GCC is free software; you can redistribute it and/or modify it under 6fb8a8121Smrg# the terms of the GNU General Public License as published by the Free 7fb8a8121Smrg# Software Foundation; either version 3, or (at your option) any later 8fb8a8121Smrg# version. 9fb8a8121Smrg# 10fb8a8121Smrg# GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11fb8a8121Smrg# WARRANTY; without even the implied warranty of MERCHANTABILITY or 12fb8a8121Smrg# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13fb8a8121Smrg# for more details. 14fb8a8121Smrg# 15fb8a8121Smrg# You should have received a copy of the GNU General Public License 16fb8a8121Smrg# along with GCC; see the file COPYING3. If not see 17fb8a8121Smrg# <http://www.gnu.org/licenses/>. */ 18fb8a8121Smrg 19fb8a8121Smrgimport difflib 20fb8a8121Smrgimport os 21fb8a8121Smrgimport re 22a448f87cSmrgimport sys 23fb8a8121Smrg 24a448f87cSmrgdefault_changelog_locations = { 25fb8a8121Smrg 'c++tools', 26fb8a8121Smrg 'config', 27fb8a8121Smrg 'contrib', 28fb8a8121Smrg 'contrib/header-tools', 29fb8a8121Smrg 'contrib/reghunt', 30fb8a8121Smrg 'contrib/regression', 31fb8a8121Smrg 'fixincludes', 32fb8a8121Smrg 'gcc/ada', 33fb8a8121Smrg 'gcc/analyzer', 34fb8a8121Smrg 'gcc/brig', 35fb8a8121Smrg 'gcc/c', 36fb8a8121Smrg 'gcc/c-family', 37fb8a8121Smrg 'gcc', 38fb8a8121Smrg 'gcc/cp', 39fb8a8121Smrg 'gcc/d', 40fb8a8121Smrg 'gcc/fortran', 41fb8a8121Smrg 'gcc/go', 42fb8a8121Smrg 'gcc/jit', 43fb8a8121Smrg 'gcc/lto', 44fb8a8121Smrg 'gcc/objc', 45fb8a8121Smrg 'gcc/objcp', 46fb8a8121Smrg 'gcc/po', 47fb8a8121Smrg 'gcc/testsuite', 48fb8a8121Smrg 'gnattools', 49fb8a8121Smrg 'gotools', 50fb8a8121Smrg 'include', 51fb8a8121Smrg 'intl', 52fb8a8121Smrg 'libada', 53fb8a8121Smrg 'libatomic', 54fb8a8121Smrg 'libbacktrace', 55fb8a8121Smrg 'libcc1', 56fb8a8121Smrg 'libcody', 57fb8a8121Smrg 'libcpp', 58fb8a8121Smrg 'libcpp/po', 59fb8a8121Smrg 'libdecnumber', 60fb8a8121Smrg 'libffi', 61fb8a8121Smrg 'libgcc', 62fb8a8121Smrg 'libgcc/config/avr/libf7', 63fb8a8121Smrg 'libgcc/config/libbid', 64fb8a8121Smrg 'libgfortran', 65fb8a8121Smrg 'libgomp', 66fb8a8121Smrg 'libhsail-rt', 67fb8a8121Smrg 'libiberty', 68fb8a8121Smrg 'libitm', 69fb8a8121Smrg 'libobjc', 70fb8a8121Smrg 'liboffloadmic', 71fb8a8121Smrg 'libphobos', 72fb8a8121Smrg 'libquadmath', 73fb8a8121Smrg 'libsanitizer', 74fb8a8121Smrg 'libssp', 75fb8a8121Smrg 'libstdc++-v3', 76fb8a8121Smrg 'libvtv', 77fb8a8121Smrg 'lto-plugin', 78fb8a8121Smrg 'maintainer-scripts', 79fb8a8121Smrg 'zlib'} 80fb8a8121Smrg 81fb8a8121Smrgbug_components = { 82fb8a8121Smrg 'ada', 83fb8a8121Smrg 'analyzer', 84fb8a8121Smrg 'boehm-gc', 85fb8a8121Smrg 'bootstrap', 86fb8a8121Smrg 'c', 87fb8a8121Smrg 'c++', 88fb8a8121Smrg 'd', 89fb8a8121Smrg 'debug', 90fb8a8121Smrg 'demangler', 91fb8a8121Smrg 'driver', 92fb8a8121Smrg 'fastjar', 93fb8a8121Smrg 'fortran', 94fb8a8121Smrg 'gcov-profile', 95fb8a8121Smrg 'go', 96fb8a8121Smrg 'hsa', 97fb8a8121Smrg 'inline-asm', 98fb8a8121Smrg 'ipa', 99fb8a8121Smrg 'java', 100fb8a8121Smrg 'jit', 101fb8a8121Smrg 'libbacktrace', 102fb8a8121Smrg 'libf2c', 103fb8a8121Smrg 'libffi', 104fb8a8121Smrg 'libfortran', 105fb8a8121Smrg 'libgcc', 106fb8a8121Smrg 'libgcj', 107fb8a8121Smrg 'libgomp', 108fb8a8121Smrg 'libitm', 109fb8a8121Smrg 'libobjc', 110fb8a8121Smrg 'libquadmath', 111fb8a8121Smrg 'libstdc++', 112fb8a8121Smrg 'lto', 113fb8a8121Smrg 'middle-end', 114fb8a8121Smrg 'modula2', 115fb8a8121Smrg 'objc', 116fb8a8121Smrg 'objc++', 117fb8a8121Smrg 'other', 118fb8a8121Smrg 'pch', 119fb8a8121Smrg 'pending', 120fb8a8121Smrg 'plugins', 121fb8a8121Smrg 'preprocessor', 122fb8a8121Smrg 'regression', 123fb8a8121Smrg 'rtl-optimization', 124fb8a8121Smrg 'sanitizer', 125fb8a8121Smrg 'spam', 126fb8a8121Smrg 'target', 127fb8a8121Smrg 'testsuite', 128fb8a8121Smrg 'translation', 129fb8a8121Smrg 'tree-optimization', 130fb8a8121Smrg 'web'} 131fb8a8121Smrg 132fb8a8121Smrgignored_prefixes = { 133fb8a8121Smrg 'gcc/d/dmd/', 134fb8a8121Smrg 'gcc/go/gofrontend/', 135fb8a8121Smrg 'gcc/testsuite/gdc.test/', 136fb8a8121Smrg 'gcc/testsuite/go.test/test/', 137a448f87cSmrg 'libffi/', 138fb8a8121Smrg 'libgo/', 139fb8a8121Smrg 'libphobos/libdruntime/', 140fb8a8121Smrg 'libphobos/src/', 141fb8a8121Smrg 'libsanitizer/', 142fb8a8121Smrg } 143fb8a8121Smrg 144fb8a8121Smrgwildcard_prefixes = { 145fb8a8121Smrg 'gcc/testsuite/', 146fb8a8121Smrg 'libstdc++-v3/doc/html/', 147fb8a8121Smrg 'libstdc++-v3/testsuite/' 148fb8a8121Smrg } 149fb8a8121Smrg 150fb8a8121Smrgmisc_files = { 151fb8a8121Smrg 'gcc/DATESTAMP', 152fb8a8121Smrg 'gcc/BASE-VER', 153fb8a8121Smrg 'gcc/DEV-PHASE' 154fb8a8121Smrg } 155fb8a8121Smrg 156fb8a8121Smrgauthor_line_regex = \ 157fb8a8121Smrg re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)') 158fb8a8121Smrgadditional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)') 159fb8a8121Smrgchangelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?') 160a448f87cSmrgsubject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})') 161a448f87cSmrgsubject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]') 162a448f87cSmrgpr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$') 163fb8a8121Smrgdr_regex = re.compile(r'\tDR ([0-9]+)$') 164fb8a8121Smrgstar_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)') 165fb8a8121Smrgend_of_location_regex = re.compile(r'[\[<(:]') 166fb8a8121Smrgitem_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$') 167fb8a8121Smrgitem_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)') 168*b1e83836Smrgrevert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$') 169fb8a8121Smrgcherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)') 170fb8a8121Smrg 171fb8a8121SmrgLINE_LIMIT = 100 172fb8a8121SmrgTAB_WIDTH = 8 173fb8a8121SmrgCO_AUTHORED_BY_PREFIX = 'co-authored-by: ' 174fb8a8121Smrg 175fb8a8121SmrgREVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ', 176fb8a8121Smrg 'acked-by: ', 'tested-by: ', 'reported-by: ', 177fb8a8121Smrg 'suggested-by: ') 178fb8a8121SmrgDATE_FORMAT = '%Y-%m-%d' 179fb8a8121Smrg 180fb8a8121Smrg 181fb8a8121Smrgdef decode_path(path): 182fb8a8121Smrg # When core.quotepath is true (default value), utf8 chars are encoded like: 183fb8a8121Smrg # "b/ko\304\215ka.txt" 184fb8a8121Smrg # 185fb8a8121Smrg # The upstream bug is fixed: 186fb8a8121Smrg # https://github.com/gitpython-developers/GitPython/issues/1099 187fb8a8121Smrg # 188fb8a8121Smrg # but we still need a workaround for older versions of the library. 189fb8a8121Smrg # Please take a look at the explanation of the transformation: 190fb8a8121Smrg # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string 191fb8a8121Smrg 192fb8a8121Smrg if path.startswith('"') and path.endswith('"'): 193fb8a8121Smrg return (path.strip('"').encode('utf8').decode('unicode-escape') 194fb8a8121Smrg .encode('latin-1').decode('utf8')) 195fb8a8121Smrg else: 196fb8a8121Smrg return path 197fb8a8121Smrg 198fb8a8121Smrg 199fb8a8121Smrgclass Error: 200a448f87cSmrg def __init__(self, message, line=None, details=None): 201fb8a8121Smrg self.message = message 202fb8a8121Smrg self.line = line 203a448f87cSmrg self.details = details 204fb8a8121Smrg 205fb8a8121Smrg def __repr__(self): 206fb8a8121Smrg s = self.message 207fb8a8121Smrg if self.line: 208fb8a8121Smrg s += ': "%s"' % self.line 209fb8a8121Smrg return s 210fb8a8121Smrg 211fb8a8121Smrg 212fb8a8121Smrgclass ChangeLogEntry: 213fb8a8121Smrg def __init__(self, folder, authors, prs): 214fb8a8121Smrg self.folder = folder 215fb8a8121Smrg # The 'list.copy()' function is not available before Python 3.3 216fb8a8121Smrg self.author_lines = list(authors) 217fb8a8121Smrg self.initial_prs = list(prs) 218fb8a8121Smrg self.prs = list(prs) 219fb8a8121Smrg self.lines = [] 220fb8a8121Smrg self.files = [] 221fb8a8121Smrg self.file_patterns = [] 222a448f87cSmrg self.parentheses_stack = [] 223fb8a8121Smrg 224fb8a8121Smrg def parse_file_names(self): 225fb8a8121Smrg # Whether the content currently processed is between a star prefix the 226fb8a8121Smrg # end of the file list: a colon or an open paren. 227fb8a8121Smrg in_location = False 228fb8a8121Smrg 229fb8a8121Smrg for line in self.lines: 230fb8a8121Smrg # If this line matches the star prefix, start the location 231fb8a8121Smrg # processing on the information that follows the star. 232a448f87cSmrg # Note that we need to skip macro names that can be in form of: 233a448f87cSmrg # 234a448f87cSmrg # * config/i386/i386.md (*fix_trunc<mode>_i387_1, 235a448f87cSmrg # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0, 236a448f87cSmrg # *fist<mode>2_<rounding>_1, *<code><mode>3_1): 237a448f87cSmrg # 238fb8a8121Smrg m = star_prefix_regex.match(line) 239a448f87cSmrg if m and len(m.group('spaces')) == 1: 240fb8a8121Smrg in_location = True 241fb8a8121Smrg line = m.group('content') 242fb8a8121Smrg 243fb8a8121Smrg if in_location: 244fb8a8121Smrg # Strip everything that is not a filename in "line": 245fb8a8121Smrg # entities "(NAME)", cases "<PATTERN>", conditions 246fb8a8121Smrg # "[COND]", entry text (the colon, if present, and 247fb8a8121Smrg # anything that follows it). 248fb8a8121Smrg m = end_of_location_regex.search(line) 249fb8a8121Smrg if m: 250fb8a8121Smrg line = line[:m.start()] 251fb8a8121Smrg in_location = False 252fb8a8121Smrg 253fb8a8121Smrg # At this point, all that's left is a list of filenames 254fb8a8121Smrg # separated by commas and whitespaces. 255fb8a8121Smrg for file in line.split(','): 256fb8a8121Smrg file = file.strip() 257fb8a8121Smrg if file: 258fb8a8121Smrg if file.endswith('*'): 259fb8a8121Smrg self.file_patterns.append(file[:-1]) 260fb8a8121Smrg else: 261fb8a8121Smrg self.files.append(file) 262fb8a8121Smrg 263fb8a8121Smrg @property 264fb8a8121Smrg def datetime(self): 265fb8a8121Smrg for author in self.author_lines: 266fb8a8121Smrg if author[1]: 267fb8a8121Smrg return author[1] 268fb8a8121Smrg return None 269fb8a8121Smrg 270fb8a8121Smrg @property 271fb8a8121Smrg def authors(self): 272fb8a8121Smrg return [author_line[0] for author_line in self.author_lines] 273fb8a8121Smrg 274fb8a8121Smrg @property 275fb8a8121Smrg def is_empty(self): 276fb8a8121Smrg return not self.lines and self.prs == self.initial_prs 277fb8a8121Smrg 278fb8a8121Smrg def contains_author(self, author): 279fb8a8121Smrg for author_lines in self.author_lines: 280fb8a8121Smrg if author_lines[0] == author: 281fb8a8121Smrg return True 282fb8a8121Smrg return False 283fb8a8121Smrg 284fb8a8121Smrg 285fb8a8121Smrgclass GitInfo: 286fb8a8121Smrg def __init__(self, hexsha, date, author, lines, modified_files): 287fb8a8121Smrg self.hexsha = hexsha 288fb8a8121Smrg self.date = date 289fb8a8121Smrg self.author = author 290fb8a8121Smrg self.lines = lines 291fb8a8121Smrg self.modified_files = modified_files 292fb8a8121Smrg 293fb8a8121Smrg 294fb8a8121Smrgclass GitCommit: 295a448f87cSmrg def __init__(self, info, commit_to_info_hook=None, ref_name=None): 296fb8a8121Smrg self.original_info = info 297fb8a8121Smrg self.info = info 298fb8a8121Smrg self.message = None 299fb8a8121Smrg self.changes = None 300fb8a8121Smrg self.changelog_entries = [] 301fb8a8121Smrg self.errors = [] 302fb8a8121Smrg self.top_level_authors = [] 303fb8a8121Smrg self.co_authors = [] 304fb8a8121Smrg self.top_level_prs = [] 305a448f87cSmrg self.subject_prs = set() 306fb8a8121Smrg self.cherry_pick_commit = None 307fb8a8121Smrg self.revert_commit = None 308fb8a8121Smrg self.commit_to_info_hook = commit_to_info_hook 309a448f87cSmrg self.init_changelog_locations(ref_name) 310fb8a8121Smrg 311fb8a8121Smrg # Skip Update copyright years commits 312fb8a8121Smrg if self.info.lines and self.info.lines[0] == 'Update copyright years.': 313fb8a8121Smrg return 314fb8a8121Smrg 315a448f87cSmrg if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]: 316a448f87cSmrg self.errors.append(Error('Expected empty second line in commit message', info.lines[0])) 317a448f87cSmrg 318fb8a8121Smrg # Identify first if the commit is a Revert commit 319fb8a8121Smrg for line in self.info.lines: 320*b1e83836Smrg m = revert_regex.fullmatch(line) 321fb8a8121Smrg if m: 322fb8a8121Smrg self.revert_commit = m.group('hash') 323fb8a8121Smrg break 324fb8a8121Smrg if self.revert_commit: 325fb8a8121Smrg self.info = self.commit_to_info_hook(self.revert_commit) 326fb8a8121Smrg 327a448f87cSmrg # The following happens for get_email.py: 328a448f87cSmrg if not self.info: 329a448f87cSmrg return 330a448f87cSmrg 331a448f87cSmrg self.check_commit_email() 332a448f87cSmrg 333a448f87cSmrg # Extract PR numbers form the subject line 334a448f87cSmrg # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn 335a448f87cSmrg if self.info.lines and not self.revert_commit: 336a448f87cSmrg self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])} 337a448f87cSmrg for m in subject_pr_regex.finditer(info.lines[0]): 338a448f87cSmrg if not m.group('component') in bug_components: 339a448f87cSmrg self.errors.append(Error('invalid PR component in subject', info.lines[0])) 340a448f87cSmrg self.subject_prs.add(m.group('pr')) 341a448f87cSmrg 342a448f87cSmrg # Allow complete deletion of ChangeLog files in a commit 343fb8a8121Smrg project_files = [f for f in self.info.modified_files 344a448f87cSmrg if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D') 345fb8a8121Smrg or f[0] in misc_files] 346fb8a8121Smrg ignored_files = [f for f in self.info.modified_files 347fb8a8121Smrg if self.in_ignored_location(f[0])] 348fb8a8121Smrg if len(project_files) == len(self.info.modified_files): 349fb8a8121Smrg # All modified files are only MISC files 350fb8a8121Smrg return 351a448f87cSmrg elif project_files: 352a448f87cSmrg err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \ 353a448f87cSmrg 'should be done separately from normal commits\n' \ 354a448f87cSmrg '(note: ChangeLog entries will be automatically ' \ 355a448f87cSmrg 'added by a cron job)' 356a448f87cSmrg self.errors.append(Error(err)) 357fb8a8121Smrg return 358fb8a8121Smrg 359fb8a8121Smrg all_are_ignored = (len(project_files) + len(ignored_files) 360fb8a8121Smrg == len(self.info.modified_files)) 361fb8a8121Smrg self.parse_lines(all_are_ignored) 362fb8a8121Smrg if self.changes: 363fb8a8121Smrg self.parse_changelog() 364fb8a8121Smrg self.parse_file_names() 365fb8a8121Smrg self.check_for_empty_description() 366a448f87cSmrg self.check_for_broken_parentheses() 367fb8a8121Smrg self.deduce_changelog_locations() 368fb8a8121Smrg self.check_file_patterns() 369fb8a8121Smrg if not self.errors: 370fb8a8121Smrg self.check_mentioned_files() 371fb8a8121Smrg self.check_for_correct_changelog() 372a448f87cSmrg if self.subject_prs: 373a448f87cSmrg self.errors.append(Error('PR %s in subject but not in changelog' % 374a448f87cSmrg ', '.join(self.subject_prs), self.info.lines[0])) 375fb8a8121Smrg 376fb8a8121Smrg @property 377fb8a8121Smrg def success(self): 378fb8a8121Smrg return not self.errors 379fb8a8121Smrg 380fb8a8121Smrg @property 381fb8a8121Smrg def new_files(self): 382fb8a8121Smrg return [x[0] for x in self.info.modified_files if x[1] == 'A'] 383fb8a8121Smrg 384fb8a8121Smrg @classmethod 385a448f87cSmrg def is_changelog_filename(cls, path, allow_suffix=False): 386a448f87cSmrg basename = os.path.basename(path) 387a448f87cSmrg if basename == 'ChangeLog': 388a448f87cSmrg return True 389a448f87cSmrg elif allow_suffix and basename.startswith('ChangeLog'): 390a448f87cSmrg return True 391a448f87cSmrg else: 392a448f87cSmrg return False 393fb8a8121Smrg 394a448f87cSmrg def find_changelog_location(self, name): 395fb8a8121Smrg if name.startswith('\t'): 396fb8a8121Smrg name = name[1:] 397fb8a8121Smrg if name.endswith(':'): 398fb8a8121Smrg name = name[:-1] 399fb8a8121Smrg if name.endswith('/'): 400fb8a8121Smrg name = name[:-1] 401a448f87cSmrg return name if name in self.changelog_locations else None 402fb8a8121Smrg 403fb8a8121Smrg @classmethod 404fb8a8121Smrg def format_git_author(cls, author): 405fb8a8121Smrg assert '<' in author 406fb8a8121Smrg return author.replace('<', ' <') 407fb8a8121Smrg 408fb8a8121Smrg @classmethod 409fb8a8121Smrg def parse_git_name_status(cls, string): 410fb8a8121Smrg modified_files = [] 411fb8a8121Smrg for entry in string.split('\n'): 412fb8a8121Smrg parts = entry.split('\t') 413fb8a8121Smrg t = parts[0] 414fb8a8121Smrg if t == 'A' or t == 'D' or t == 'M': 415fb8a8121Smrg modified_files.append((parts[1], t)) 416fb8a8121Smrg elif t.startswith('R'): 417fb8a8121Smrg modified_files.append((parts[1], 'D')) 418fb8a8121Smrg modified_files.append((parts[2], 'A')) 419fb8a8121Smrg return modified_files 420fb8a8121Smrg 421a448f87cSmrg def init_changelog_locations(self, ref_name): 422a448f87cSmrg self.changelog_locations = list(default_changelog_locations) 423a448f87cSmrg if ref_name: 424a448f87cSmrg version = sys.maxsize 425a448f87cSmrg if 'releases/gcc-' in ref_name: 426a448f87cSmrg version = int(ref_name.split('-')[-1]) 427a448f87cSmrg if version >= 12: 428a448f87cSmrg # HSA and BRIG were removed in GCC 12 429a448f87cSmrg self.changelog_locations.remove('gcc/brig') 430a448f87cSmrg self.changelog_locations.remove('libhsail-rt') 431a448f87cSmrg 432fb8a8121Smrg def parse_lines(self, all_are_ignored): 433fb8a8121Smrg body = self.info.lines 434fb8a8121Smrg 435fb8a8121Smrg for i, b in enumerate(body): 436fb8a8121Smrg if not b: 437fb8a8121Smrg continue 438fb8a8121Smrg if (changelog_regex.match(b) or self.find_changelog_location(b) 439fb8a8121Smrg or star_prefix_regex.match(b) or pr_regex.match(b) 440a448f87cSmrg or dr_regex.match(b) or author_line_regex.match(b) 441a448f87cSmrg or b.lower().startswith(CO_AUTHORED_BY_PREFIX)): 442fb8a8121Smrg self.changes = body[i:] 443fb8a8121Smrg return 444fb8a8121Smrg if not all_are_ignored: 445fb8a8121Smrg self.errors.append(Error('cannot find a ChangeLog location in ' 446fb8a8121Smrg 'message')) 447fb8a8121Smrg 448fb8a8121Smrg def parse_changelog(self): 449fb8a8121Smrg last_entry = None 450fb8a8121Smrg will_deduce = False 451fb8a8121Smrg for line in self.changes: 452fb8a8121Smrg if not line: 453fb8a8121Smrg if last_entry and will_deduce: 454fb8a8121Smrg last_entry = None 455fb8a8121Smrg continue 456fb8a8121Smrg if line != line.rstrip(): 457fb8a8121Smrg self.errors.append(Error('trailing whitespace', line)) 458fb8a8121Smrg if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT: 459a448f87cSmrg # support long filenames 460a448f87cSmrg if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]: 461fb8a8121Smrg self.errors.append(Error('line exceeds %d character limit' 462fb8a8121Smrg % LINE_LIMIT, line)) 463fb8a8121Smrg m = changelog_regex.match(line) 464fb8a8121Smrg if m: 465fb8a8121Smrg last_entry = ChangeLogEntry(m.group(1).rstrip('/'), 466fb8a8121Smrg self.top_level_authors, 467fb8a8121Smrg self.top_level_prs) 468fb8a8121Smrg self.changelog_entries.append(last_entry) 469fb8a8121Smrg elif self.find_changelog_location(line): 470fb8a8121Smrg last_entry = ChangeLogEntry(self.find_changelog_location(line), 471fb8a8121Smrg self.top_level_authors, 472fb8a8121Smrg self.top_level_prs) 473fb8a8121Smrg self.changelog_entries.append(last_entry) 474fb8a8121Smrg else: 475fb8a8121Smrg author_tuple = None 476fb8a8121Smrg pr_line = None 477fb8a8121Smrg if author_line_regex.match(line): 478fb8a8121Smrg m = author_line_regex.match(line) 479fb8a8121Smrg author_tuple = (m.group('name'), m.group('datetime')) 480fb8a8121Smrg elif additional_author_regex.match(line): 481fb8a8121Smrg m = additional_author_regex.match(line) 482fb8a8121Smrg if len(m.group('spaces')) != 4: 483fb8a8121Smrg msg = 'additional author must be indented with '\ 484fb8a8121Smrg 'one tab and four spaces' 485fb8a8121Smrg self.errors.append(Error(msg, line)) 486fb8a8121Smrg else: 487fb8a8121Smrg author_tuple = (m.group('name'), None) 488fb8a8121Smrg elif pr_regex.match(line): 489a448f87cSmrg m = pr_regex.match(line) 490a448f87cSmrg component = m.group('component') 491a448f87cSmrg pr = m.group('pr') 492fb8a8121Smrg if not component: 493fb8a8121Smrg self.errors.append(Error('missing PR component', line)) 494fb8a8121Smrg continue 495fb8a8121Smrg elif not component[:-1] in bug_components: 496fb8a8121Smrg self.errors.append(Error('invalid PR component', line)) 497fb8a8121Smrg continue 498fb8a8121Smrg else: 499fb8a8121Smrg pr_line = line.lstrip() 500a448f87cSmrg if pr in self.subject_prs: 501a448f87cSmrg self.subject_prs.remove(pr) 502fb8a8121Smrg elif dr_regex.match(line): 503fb8a8121Smrg pr_line = line.lstrip() 504fb8a8121Smrg 505fb8a8121Smrg lowered_line = line.lower() 506fb8a8121Smrg if lowered_line.startswith(CO_AUTHORED_BY_PREFIX): 507fb8a8121Smrg name = line[len(CO_AUTHORED_BY_PREFIX):] 508fb8a8121Smrg author = self.format_git_author(name) 509fb8a8121Smrg self.co_authors.append(author) 510fb8a8121Smrg continue 511fb8a8121Smrg elif lowered_line.startswith(REVIEW_PREFIXES): 512fb8a8121Smrg continue 513fb8a8121Smrg else: 514fb8a8121Smrg m = cherry_pick_regex.search(line) 515fb8a8121Smrg if m: 516fb8a8121Smrg commit = m.group('hash') 517fb8a8121Smrg if self.cherry_pick_commit: 518fb8a8121Smrg msg = 'multiple cherry pick lines' 519fb8a8121Smrg self.errors.append(Error(msg, line)) 520fb8a8121Smrg else: 521fb8a8121Smrg self.cherry_pick_commit = commit 522fb8a8121Smrg continue 523fb8a8121Smrg 524fb8a8121Smrg # ChangeLog name will be deduced later 525fb8a8121Smrg if not last_entry: 526fb8a8121Smrg if author_tuple: 527fb8a8121Smrg self.top_level_authors.append(author_tuple) 528fb8a8121Smrg continue 529fb8a8121Smrg elif pr_line: 530fb8a8121Smrg # append to top_level_prs only when we haven't met 531fb8a8121Smrg # a ChangeLog entry 532fb8a8121Smrg if (pr_line not in self.top_level_prs 533fb8a8121Smrg and not self.changelog_entries): 534fb8a8121Smrg self.top_level_prs.append(pr_line) 535fb8a8121Smrg continue 536fb8a8121Smrg else: 537fb8a8121Smrg last_entry = ChangeLogEntry(None, 538fb8a8121Smrg self.top_level_authors, 539fb8a8121Smrg self.top_level_prs) 540fb8a8121Smrg self.changelog_entries.append(last_entry) 541fb8a8121Smrg will_deduce = True 542fb8a8121Smrg elif author_tuple: 543fb8a8121Smrg if not last_entry.contains_author(author_tuple[0]): 544fb8a8121Smrg last_entry.author_lines.append(author_tuple) 545fb8a8121Smrg continue 546fb8a8121Smrg 547fb8a8121Smrg if not line.startswith('\t'): 548fb8a8121Smrg err = Error('line should start with a tab', line) 549fb8a8121Smrg self.errors.append(err) 550fb8a8121Smrg elif pr_line: 551fb8a8121Smrg last_entry.prs.append(pr_line) 552fb8a8121Smrg else: 553fb8a8121Smrg m = star_prefix_regex.match(line) 554fb8a8121Smrg if m: 555a448f87cSmrg if (len(m.group('spaces')) != 1 and 556a448f87cSmrg not last_entry.parentheses_stack): 557fb8a8121Smrg msg = 'one space should follow asterisk' 558fb8a8121Smrg self.errors.append(Error(msg, line)) 559fb8a8121Smrg else: 560fb8a8121Smrg content = m.group('content') 561fb8a8121Smrg parts = content.split(':') 562fb8a8121Smrg if len(parts) > 1: 563fb8a8121Smrg for needle in ('()', '[]', '<>'): 564fb8a8121Smrg if ' ' + needle in parts[0]: 565fb8a8121Smrg msg = f'empty group "{needle}" found' 566fb8a8121Smrg self.errors.append(Error(msg, line)) 567fb8a8121Smrg last_entry.lines.append(line) 568a448f87cSmrg self.process_parentheses(last_entry, line) 569fb8a8121Smrg else: 570fb8a8121Smrg if last_entry.is_empty: 571fb8a8121Smrg msg = 'first line should start with a tab, ' \ 572fb8a8121Smrg 'an asterisk and a space' 573fb8a8121Smrg self.errors.append(Error(msg, line)) 574fb8a8121Smrg else: 575fb8a8121Smrg last_entry.lines.append(line) 576a448f87cSmrg self.process_parentheses(last_entry, line) 577a448f87cSmrg 578a448f87cSmrg def process_parentheses(self, last_entry, line): 579a448f87cSmrg for c in line: 580a448f87cSmrg if c == '(': 581a448f87cSmrg last_entry.parentheses_stack.append(line) 582a448f87cSmrg elif c == ')': 583a448f87cSmrg if not last_entry.parentheses_stack: 584a448f87cSmrg msg = 'bad wrapping of parenthesis' 585a448f87cSmrg self.errors.append(Error(msg, line)) 586a448f87cSmrg else: 587a448f87cSmrg del last_entry.parentheses_stack[-1] 588fb8a8121Smrg 589fb8a8121Smrg def parse_file_names(self): 590fb8a8121Smrg for entry in self.changelog_entries: 591fb8a8121Smrg entry.parse_file_names() 592fb8a8121Smrg 593fb8a8121Smrg def check_file_patterns(self): 594fb8a8121Smrg for entry in self.changelog_entries: 595fb8a8121Smrg for pattern in entry.file_patterns: 596fb8a8121Smrg name = os.path.join(entry.folder, pattern) 597fb8a8121Smrg if not [name.startswith(pr) for pr in wildcard_prefixes]: 598fb8a8121Smrg msg = 'unsupported wildcard prefix' 599fb8a8121Smrg self.errors.append(Error(msg, name)) 600fb8a8121Smrg 601fb8a8121Smrg def check_for_empty_description(self): 602fb8a8121Smrg for entry in self.changelog_entries: 603fb8a8121Smrg for i, line in enumerate(entry.lines): 604fb8a8121Smrg if (item_empty_regex.match(line) and 605fb8a8121Smrg (i == len(entry.lines) - 1 606fb8a8121Smrg or not entry.lines[i+1].strip() 607fb8a8121Smrg or item_parenthesis_regex.match(entry.lines[i+1]))): 608fb8a8121Smrg msg = 'missing description of a change' 609fb8a8121Smrg self.errors.append(Error(msg, line)) 610fb8a8121Smrg 611a448f87cSmrg def check_for_broken_parentheses(self): 612a448f87cSmrg for entry in self.changelog_entries: 613a448f87cSmrg if entry.parentheses_stack: 614a448f87cSmrg msg = 'bad parentheses wrapping' 615a448f87cSmrg self.errors.append(Error(msg, entry.parentheses_stack[-1])) 616a448f87cSmrg 617fb8a8121Smrg def get_file_changelog_location(self, changelog_file): 618fb8a8121Smrg for file in self.info.modified_files: 619fb8a8121Smrg if file[0] == changelog_file: 620fb8a8121Smrg # root ChangeLog file 621fb8a8121Smrg return '' 622fb8a8121Smrg index = file[0].find('/' + changelog_file) 623fb8a8121Smrg if index != -1: 624fb8a8121Smrg return file[0][:index] 625fb8a8121Smrg return None 626fb8a8121Smrg 627fb8a8121Smrg def deduce_changelog_locations(self): 628fb8a8121Smrg for entry in self.changelog_entries: 629fb8a8121Smrg if not entry.folder: 630fb8a8121Smrg changelog = None 631fb8a8121Smrg for file in entry.files: 632fb8a8121Smrg location = self.get_file_changelog_location(file) 633fb8a8121Smrg if (location == '' 634a448f87cSmrg or (location and location in self.changelog_locations)): 635fb8a8121Smrg if changelog and changelog != location: 636fb8a8121Smrg msg = 'could not deduce ChangeLog file, ' \ 637fb8a8121Smrg 'not unique location' 638fb8a8121Smrg self.errors.append(Error(msg)) 639fb8a8121Smrg return 640fb8a8121Smrg changelog = location 641fb8a8121Smrg if changelog is not None: 642fb8a8121Smrg entry.folder = changelog 643fb8a8121Smrg else: 644fb8a8121Smrg msg = 'could not deduce ChangeLog file' 645fb8a8121Smrg self.errors.append(Error(msg)) 646fb8a8121Smrg 647fb8a8121Smrg @classmethod 648fb8a8121Smrg def in_ignored_location(cls, path): 649fb8a8121Smrg for ignored in ignored_prefixes: 650fb8a8121Smrg if path.startswith(ignored): 651fb8a8121Smrg return True 652fb8a8121Smrg return False 653fb8a8121Smrg 654a448f87cSmrg def get_changelog_by_path(self, path): 655fb8a8121Smrg components = path.split('/') 656fb8a8121Smrg while components: 657a448f87cSmrg if '/'.join(components) in self.changelog_locations: 658fb8a8121Smrg break 659fb8a8121Smrg components = components[:-1] 660fb8a8121Smrg return '/'.join(components) 661fb8a8121Smrg 662fb8a8121Smrg def check_mentioned_files(self): 663fb8a8121Smrg folder_count = len([x.folder for x in self.changelog_entries]) 664fb8a8121Smrg assert folder_count == len(self.changelog_entries) 665fb8a8121Smrg 666fb8a8121Smrg mentioned_files = set() 667fb8a8121Smrg mentioned_patterns = [] 668fb8a8121Smrg used_patterns = set() 669fb8a8121Smrg for entry in self.changelog_entries: 670fb8a8121Smrg if not entry.files and not entry.file_patterns: 671fb8a8121Smrg msg = 'no files mentioned for ChangeLog in directory' 672fb8a8121Smrg self.errors.append(Error(msg, entry.folder)) 673fb8a8121Smrg assert not entry.folder.endswith('/') 674fb8a8121Smrg for file in entry.files: 675fb8a8121Smrg if not self.is_changelog_filename(file): 676a448f87cSmrg item = os.path.join(entry.folder, file) 677a448f87cSmrg if item in mentioned_files: 678a448f87cSmrg msg = 'same file specified multiple times' 679a448f87cSmrg self.errors.append(Error(msg, file)) 680a448f87cSmrg else: 681a448f87cSmrg mentioned_files.add(item) 682fb8a8121Smrg for pattern in entry.file_patterns: 683fb8a8121Smrg mentioned_patterns.append(os.path.join(entry.folder, pattern)) 684fb8a8121Smrg 685fb8a8121Smrg cand = [x[0] for x in self.info.modified_files 686fb8a8121Smrg if not self.is_changelog_filename(x[0])] 687fb8a8121Smrg changed_files = set(cand) 688fb8a8121Smrg for file in sorted(mentioned_files - changed_files): 689fb8a8121Smrg msg = 'unchanged file mentioned in a ChangeLog' 690fb8a8121Smrg candidates = difflib.get_close_matches(file, changed_files, 1) 691a448f87cSmrg details = None 692fb8a8121Smrg if candidates: 693fb8a8121Smrg msg += f' (did you mean "{candidates[0]}"?)' 694a448f87cSmrg details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip() 695a448f87cSmrg self.errors.append(Error(msg, file, details)) 696fb8a8121Smrg for file in sorted(changed_files - mentioned_files): 697fb8a8121Smrg if not self.in_ignored_location(file): 698fb8a8121Smrg if file in self.new_files: 699fb8a8121Smrg changelog_location = self.get_changelog_by_path(file) 700fb8a8121Smrg # Python2: we cannot use next(filter(...)) 701fb8a8121Smrg entries = filter(lambda x: x.folder == changelog_location, 702fb8a8121Smrg self.changelog_entries) 703fb8a8121Smrg entries = list(entries) 704fb8a8121Smrg entry = entries[0] if entries else None 705fb8a8121Smrg if not entry: 706fb8a8121Smrg prs = self.top_level_prs 707fb8a8121Smrg if not prs: 708fb8a8121Smrg # if all ChangeLog entries have identical PRs 709fb8a8121Smrg # then use them 710fb8a8121Smrg prs = self.changelog_entries[0].prs 711fb8a8121Smrg for entry in self.changelog_entries: 712fb8a8121Smrg if entry.prs != prs: 713fb8a8121Smrg prs = [] 714fb8a8121Smrg break 715fb8a8121Smrg entry = ChangeLogEntry(changelog_location, 716fb8a8121Smrg self.top_level_authors, 717fb8a8121Smrg prs) 718fb8a8121Smrg self.changelog_entries.append(entry) 719fb8a8121Smrg # strip prefix of the file 720fb8a8121Smrg assert file.startswith(entry.folder) 721a448f87cSmrg # do not allow auto-addition of New files 722a448f87cSmrg # for the top-level folder 723a448f87cSmrg if entry.folder: 724fb8a8121Smrg file = file[len(entry.folder):].lstrip('/') 725fb8a8121Smrg entry.lines.append('\t* %s: New file.' % file) 726fb8a8121Smrg entry.files.append(file) 727fb8a8121Smrg else: 728a448f87cSmrg msg = 'new file in the top-level folder not mentioned in a ChangeLog' 729a448f87cSmrg self.errors.append(Error(msg, file)) 730a448f87cSmrg else: 731fb8a8121Smrg used_pattern = [p for p in mentioned_patterns 732fb8a8121Smrg if file.startswith(p)] 733fb8a8121Smrg used_pattern = used_pattern[0] if used_pattern else None 734fb8a8121Smrg if used_pattern: 735fb8a8121Smrg used_patterns.add(used_pattern) 736fb8a8121Smrg else: 737fb8a8121Smrg msg = 'changed file not mentioned in a ChangeLog' 738fb8a8121Smrg self.errors.append(Error(msg, file)) 739fb8a8121Smrg 740fb8a8121Smrg for pattern in mentioned_patterns: 741fb8a8121Smrg if pattern not in used_patterns: 742fb8a8121Smrg error = "pattern doesn't match any changed files" 743fb8a8121Smrg self.errors.append(Error(error, pattern)) 744fb8a8121Smrg 745fb8a8121Smrg def check_for_correct_changelog(self): 746fb8a8121Smrg for entry in self.changelog_entries: 747fb8a8121Smrg for file in entry.files: 748fb8a8121Smrg full_path = os.path.join(entry.folder, file) 749fb8a8121Smrg changelog_location = self.get_changelog_by_path(full_path) 750fb8a8121Smrg if changelog_location != entry.folder: 751fb8a8121Smrg msg = 'wrong ChangeLog location "%s", should be "%s"' 752fb8a8121Smrg err = Error(msg % (entry.folder, changelog_location), file) 753fb8a8121Smrg self.errors.append(err) 754fb8a8121Smrg 755fb8a8121Smrg @classmethod 756fb8a8121Smrg def format_authors_in_changelog(cls, authors, timestamp, prefix=''): 757fb8a8121Smrg output = '' 758fb8a8121Smrg for i, author in enumerate(authors): 759fb8a8121Smrg if i == 0: 760fb8a8121Smrg output += '%s%s %s\n' % (prefix, timestamp, author) 761fb8a8121Smrg else: 762fb8a8121Smrg output += '%s\t %s\n' % (prefix, author) 763fb8a8121Smrg output += '\n' 764fb8a8121Smrg return output 765fb8a8121Smrg 766fb8a8121Smrg def to_changelog_entries(self, use_commit_ts=False): 767fb8a8121Smrg current_timestamp = self.info.date.strftime(DATE_FORMAT) 768fb8a8121Smrg for entry in self.changelog_entries: 769fb8a8121Smrg output = '' 770fb8a8121Smrg timestamp = entry.datetime 771fb8a8121Smrg if self.revert_commit: 772fb8a8121Smrg timestamp = current_timestamp 773fb8a8121Smrg orig_date = self.original_info.date 774fb8a8121Smrg current_timestamp = orig_date.strftime(DATE_FORMAT) 775fb8a8121Smrg elif self.cherry_pick_commit: 776fb8a8121Smrg info = self.commit_to_info_hook(self.cherry_pick_commit) 777fb8a8121Smrg # it can happen that it is a cherry-pick for a different 778fb8a8121Smrg # repository 779fb8a8121Smrg if info: 780fb8a8121Smrg timestamp = info.date.strftime(DATE_FORMAT) 781fb8a8121Smrg else: 782fb8a8121Smrg timestamp = current_timestamp 783fb8a8121Smrg elif not timestamp or use_commit_ts: 784fb8a8121Smrg timestamp = current_timestamp 785fb8a8121Smrg authors = entry.authors if entry.authors else [self.info.author] 786fb8a8121Smrg # add Co-Authored-By authors to all ChangeLog entries 787fb8a8121Smrg for author in self.co_authors: 788fb8a8121Smrg if author not in authors: 789fb8a8121Smrg authors.append(author) 790fb8a8121Smrg 791fb8a8121Smrg if self.cherry_pick_commit or self.revert_commit: 792fb8a8121Smrg original_author = self.original_info.author 793fb8a8121Smrg output += self.format_authors_in_changelog([original_author], 794fb8a8121Smrg current_timestamp) 795fb8a8121Smrg if self.revert_commit: 796fb8a8121Smrg output += '\tRevert:\n' 797fb8a8121Smrg else: 798fb8a8121Smrg output += '\tBackported from master:\n' 799fb8a8121Smrg output += self.format_authors_in_changelog(authors, 800fb8a8121Smrg timestamp, '\t') 801fb8a8121Smrg else: 802fb8a8121Smrg output += self.format_authors_in_changelog(authors, timestamp) 803fb8a8121Smrg for pr in entry.prs: 804fb8a8121Smrg output += '\t%s\n' % pr 805fb8a8121Smrg for line in entry.lines: 806fb8a8121Smrg output += line + '\n' 807fb8a8121Smrg yield (entry.folder, output.rstrip()) 808fb8a8121Smrg 809fb8a8121Smrg def print_output(self): 810fb8a8121Smrg for entry, output in self.to_changelog_entries(): 811fb8a8121Smrg print('------ %s/ChangeLog ------ ' % entry) 812fb8a8121Smrg print(output) 813fb8a8121Smrg 814fb8a8121Smrg def print_errors(self): 815fb8a8121Smrg print('Errors:') 816fb8a8121Smrg for error in self.errors: 817fb8a8121Smrg print(error) 818a448f87cSmrg 819a448f87cSmrg def check_commit_email(self): 820a448f87cSmrg # Parse 'Martin Liska <mliska@suse.cz>' 821a448f87cSmrg email = self.info.author.split(' ')[-1].strip('<>') 822a448f87cSmrg 823a448f87cSmrg # Verify that all characters are ASCII 824a448f87cSmrg # TODO: Python 3.7 provides a nicer function: isascii 825a448f87cSmrg if len(email) != len(email.encode()): 826a448f87cSmrg self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})')) 827