1*4c3eb207Smrg#!/usr/bin/env python3 2*4c3eb207Smrg# 3*4c3eb207Smrg# This file is part of GCC. 4*4c3eb207Smrg# 5*4c3eb207Smrg# GCC is free software; you can redistribute it and/or modify it under 6*4c3eb207Smrg# the terms of the GNU General Public License as published by the Free 7*4c3eb207Smrg# Software Foundation; either version 3, or (at your option) any later 8*4c3eb207Smrg# version. 9*4c3eb207Smrg# 10*4c3eb207Smrg# GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11*4c3eb207Smrg# WARRANTY; without even the implied warranty of MERCHANTABILITY or 12*4c3eb207Smrg# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13*4c3eb207Smrg# for more details. 14*4c3eb207Smrg# 15*4c3eb207Smrg# You should have received a copy of the GNU General Public License 16*4c3eb207Smrg# along with GCC; see the file COPYING3. If not see 17*4c3eb207Smrg# <http://www.gnu.org/licenses/>. */ 18*4c3eb207Smrg 19*4c3eb207Smrgimport difflib 20*4c3eb207Smrgimport os 21*4c3eb207Smrgimport re 22*4c3eb207Smrgimport sys 23*4c3eb207Smrg 24*4c3eb207Smrgdefault_changelog_locations = { 25*4c3eb207Smrg 'c++tools', 26*4c3eb207Smrg 'config', 27*4c3eb207Smrg 'contrib', 28*4c3eb207Smrg 'contrib/header-tools', 29*4c3eb207Smrg 'contrib/reghunt', 30*4c3eb207Smrg 'contrib/regression', 31*4c3eb207Smrg 'fixincludes', 32*4c3eb207Smrg 'gcc/ada', 33*4c3eb207Smrg 'gcc/analyzer', 34*4c3eb207Smrg 'gcc/brig', 35*4c3eb207Smrg 'gcc/c', 36*4c3eb207Smrg 'gcc/c-family', 37*4c3eb207Smrg 'gcc', 38*4c3eb207Smrg 'gcc/cp', 39*4c3eb207Smrg 'gcc/d', 40*4c3eb207Smrg 'gcc/fortran', 41*4c3eb207Smrg 'gcc/go', 42*4c3eb207Smrg 'gcc/jit', 43*4c3eb207Smrg 'gcc/lto', 44*4c3eb207Smrg 'gcc/objc', 45*4c3eb207Smrg 'gcc/objcp', 46*4c3eb207Smrg 'gcc/po', 47*4c3eb207Smrg 'gcc/testsuite', 48*4c3eb207Smrg 'gnattools', 49*4c3eb207Smrg 'gotools', 50*4c3eb207Smrg 'include', 51*4c3eb207Smrg 'intl', 52*4c3eb207Smrg 'libada', 53*4c3eb207Smrg 'libatomic', 54*4c3eb207Smrg 'libbacktrace', 55*4c3eb207Smrg 'libcc1', 56*4c3eb207Smrg 'libcody', 57*4c3eb207Smrg 'libcpp', 58*4c3eb207Smrg 'libcpp/po', 59*4c3eb207Smrg 'libdecnumber', 60*4c3eb207Smrg 'libffi', 61*4c3eb207Smrg 'libgcc', 62*4c3eb207Smrg 'libgcc/config/avr/libf7', 63*4c3eb207Smrg 'libgcc/config/libbid', 64*4c3eb207Smrg 'libgfortran', 65*4c3eb207Smrg 'libgomp', 66*4c3eb207Smrg 'libhsail-rt', 67*4c3eb207Smrg 'libiberty', 68*4c3eb207Smrg 'libitm', 69*4c3eb207Smrg 'libobjc', 70*4c3eb207Smrg 'liboffloadmic', 71*4c3eb207Smrg 'libphobos', 72*4c3eb207Smrg 'libquadmath', 73*4c3eb207Smrg 'libsanitizer', 74*4c3eb207Smrg 'libssp', 75*4c3eb207Smrg 'libstdc++-v3', 76*4c3eb207Smrg 'libvtv', 77*4c3eb207Smrg 'lto-plugin', 78*4c3eb207Smrg 'maintainer-scripts', 79*4c3eb207Smrg 'zlib'} 80*4c3eb207Smrg 81*4c3eb207Smrgbug_components = { 82*4c3eb207Smrg 'ada', 83*4c3eb207Smrg 'analyzer', 84*4c3eb207Smrg 'boehm-gc', 85*4c3eb207Smrg 'bootstrap', 86*4c3eb207Smrg 'c', 87*4c3eb207Smrg 'c++', 88*4c3eb207Smrg 'd', 89*4c3eb207Smrg 'debug', 90*4c3eb207Smrg 'demangler', 91*4c3eb207Smrg 'driver', 92*4c3eb207Smrg 'fastjar', 93*4c3eb207Smrg 'fortran', 94*4c3eb207Smrg 'gcov-profile', 95*4c3eb207Smrg 'go', 96*4c3eb207Smrg 'hsa', 97*4c3eb207Smrg 'inline-asm', 98*4c3eb207Smrg 'ipa', 99*4c3eb207Smrg 'java', 100*4c3eb207Smrg 'jit', 101*4c3eb207Smrg 'libbacktrace', 102*4c3eb207Smrg 'libf2c', 103*4c3eb207Smrg 'libffi', 104*4c3eb207Smrg 'libfortran', 105*4c3eb207Smrg 'libgcc', 106*4c3eb207Smrg 'libgcj', 107*4c3eb207Smrg 'libgomp', 108*4c3eb207Smrg 'libitm', 109*4c3eb207Smrg 'libobjc', 110*4c3eb207Smrg 'libquadmath', 111*4c3eb207Smrg 'libstdc++', 112*4c3eb207Smrg 'lto', 113*4c3eb207Smrg 'middle-end', 114*4c3eb207Smrg 'modula2', 115*4c3eb207Smrg 'objc', 116*4c3eb207Smrg 'objc++', 117*4c3eb207Smrg 'other', 118*4c3eb207Smrg 'pch', 119*4c3eb207Smrg 'pending', 120*4c3eb207Smrg 'plugins', 121*4c3eb207Smrg 'preprocessor', 122*4c3eb207Smrg 'regression', 123*4c3eb207Smrg 'rtl-optimization', 124*4c3eb207Smrg 'sanitizer', 125*4c3eb207Smrg 'spam', 126*4c3eb207Smrg 'target', 127*4c3eb207Smrg 'testsuite', 128*4c3eb207Smrg 'translation', 129*4c3eb207Smrg 'tree-optimization', 130*4c3eb207Smrg 'web'} 131*4c3eb207Smrg 132*4c3eb207Smrgignored_prefixes = { 133*4c3eb207Smrg 'gcc/d/dmd/', 134*4c3eb207Smrg 'gcc/go/gofrontend/', 135*4c3eb207Smrg 'gcc/testsuite/gdc.test/', 136*4c3eb207Smrg 'gcc/testsuite/go.test/test/', 137*4c3eb207Smrg 'libffi/', 138*4c3eb207Smrg 'libgo/', 139*4c3eb207Smrg 'libphobos/libdruntime/', 140*4c3eb207Smrg 'libphobos/src/', 141*4c3eb207Smrg 'libsanitizer/', 142*4c3eb207Smrg } 143*4c3eb207Smrg 144*4c3eb207Smrgwildcard_prefixes = { 145*4c3eb207Smrg 'gcc/testsuite/', 146*4c3eb207Smrg 'libstdc++-v3/doc/html/', 147*4c3eb207Smrg 'libstdc++-v3/testsuite/' 148*4c3eb207Smrg } 149*4c3eb207Smrg 150*4c3eb207Smrgmisc_files = { 151*4c3eb207Smrg 'gcc/DATESTAMP', 152*4c3eb207Smrg 'gcc/BASE-VER', 153*4c3eb207Smrg 'gcc/DEV-PHASE' 154*4c3eb207Smrg } 155*4c3eb207Smrg 156*4c3eb207Smrgauthor_line_regex = \ 157*4c3eb207Smrg re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)') 158*4c3eb207Smrgadditional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)') 159*4c3eb207Smrgchangelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?') 160*4c3eb207Smrgsubject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})') 161*4c3eb207Smrgsubject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]') 162*4c3eb207Smrgpr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$') 163*4c3eb207Smrgdr_regex = re.compile(r'\tDR ([0-9]+)$') 164*4c3eb207Smrgstar_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)') 165*4c3eb207Smrgend_of_location_regex = re.compile(r'[\[<(:]') 166*4c3eb207Smrgitem_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$') 167*4c3eb207Smrgitem_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)') 168*4c3eb207Smrgrevert_regex = re.compile(r'This reverts commit (?P<hash>\w+).$') 169*4c3eb207Smrgcherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)') 170*4c3eb207Smrg 171*4c3eb207SmrgLINE_LIMIT = 100 172*4c3eb207SmrgTAB_WIDTH = 8 173*4c3eb207SmrgCO_AUTHORED_BY_PREFIX = 'co-authored-by: ' 174*4c3eb207Smrg 175*4c3eb207SmrgREVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ', 176*4c3eb207Smrg 'acked-by: ', 'tested-by: ', 'reported-by: ', 177*4c3eb207Smrg 'suggested-by: ') 178*4c3eb207SmrgDATE_FORMAT = '%Y-%m-%d' 179*4c3eb207Smrg 180*4c3eb207Smrg 181*4c3eb207Smrgdef decode_path(path): 182*4c3eb207Smrg # When core.quotepath is true (default value), utf8 chars are encoded like: 183*4c3eb207Smrg # "b/ko\304\215ka.txt" 184*4c3eb207Smrg # 185*4c3eb207Smrg # The upstream bug is fixed: 186*4c3eb207Smrg # https://github.com/gitpython-developers/GitPython/issues/1099 187*4c3eb207Smrg # 188*4c3eb207Smrg # but we still need a workaround for older versions of the library. 189*4c3eb207Smrg # Please take a look at the explanation of the transformation: 190*4c3eb207Smrg # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string 191*4c3eb207Smrg 192*4c3eb207Smrg if path.startswith('"') and path.endswith('"'): 193*4c3eb207Smrg return (path.strip('"').encode('utf8').decode('unicode-escape') 194*4c3eb207Smrg .encode('latin-1').decode('utf8')) 195*4c3eb207Smrg else: 196*4c3eb207Smrg return path 197*4c3eb207Smrg 198*4c3eb207Smrg 199*4c3eb207Smrgclass Error: 200*4c3eb207Smrg def __init__(self, message, line=None, details=None): 201*4c3eb207Smrg self.message = message 202*4c3eb207Smrg self.line = line 203*4c3eb207Smrg self.details = details 204*4c3eb207Smrg 205*4c3eb207Smrg def __repr__(self): 206*4c3eb207Smrg s = self.message 207*4c3eb207Smrg if self.line: 208*4c3eb207Smrg s += ': "%s"' % self.line 209*4c3eb207Smrg return s 210*4c3eb207Smrg 211*4c3eb207Smrg 212*4c3eb207Smrgclass ChangeLogEntry: 213*4c3eb207Smrg def __init__(self, folder, authors, prs): 214*4c3eb207Smrg self.folder = folder 215*4c3eb207Smrg # The 'list.copy()' function is not available before Python 3.3 216*4c3eb207Smrg self.author_lines = list(authors) 217*4c3eb207Smrg self.initial_prs = list(prs) 218*4c3eb207Smrg self.prs = list(prs) 219*4c3eb207Smrg self.lines = [] 220*4c3eb207Smrg self.files = [] 221*4c3eb207Smrg self.file_patterns = [] 222*4c3eb207Smrg self.parentheses_stack = [] 223*4c3eb207Smrg 224*4c3eb207Smrg def parse_file_names(self): 225*4c3eb207Smrg # Whether the content currently processed is between a star prefix the 226*4c3eb207Smrg # end of the file list: a colon or an open paren. 227*4c3eb207Smrg in_location = False 228*4c3eb207Smrg 229*4c3eb207Smrg for line in self.lines: 230*4c3eb207Smrg # If this line matches the star prefix, start the location 231*4c3eb207Smrg # processing on the information that follows the star. 232*4c3eb207Smrg # Note that we need to skip macro names that can be in form of: 233*4c3eb207Smrg # 234*4c3eb207Smrg # * config/i386/i386.md (*fix_trunc<mode>_i387_1, 235*4c3eb207Smrg # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0, 236*4c3eb207Smrg # *fist<mode>2_<rounding>_1, *<code><mode>3_1): 237*4c3eb207Smrg # 238*4c3eb207Smrg m = star_prefix_regex.match(line) 239*4c3eb207Smrg if m and len(m.group('spaces')) == 1: 240*4c3eb207Smrg in_location = True 241*4c3eb207Smrg line = m.group('content') 242*4c3eb207Smrg 243*4c3eb207Smrg if in_location: 244*4c3eb207Smrg # Strip everything that is not a filename in "line": 245*4c3eb207Smrg # entities "(NAME)", cases "<PATTERN>", conditions 246*4c3eb207Smrg # "[COND]", entry text (the colon, if present, and 247*4c3eb207Smrg # anything that follows it). 248*4c3eb207Smrg m = end_of_location_regex.search(line) 249*4c3eb207Smrg if m: 250*4c3eb207Smrg line = line[:m.start()] 251*4c3eb207Smrg in_location = False 252*4c3eb207Smrg 253*4c3eb207Smrg # At this point, all that's left is a list of filenames 254*4c3eb207Smrg # separated by commas and whitespaces. 255*4c3eb207Smrg for file in line.split(','): 256*4c3eb207Smrg file = file.strip() 257*4c3eb207Smrg if file: 258*4c3eb207Smrg if file.endswith('*'): 259*4c3eb207Smrg self.file_patterns.append(file[:-1]) 260*4c3eb207Smrg else: 261*4c3eb207Smrg self.files.append(file) 262*4c3eb207Smrg 263*4c3eb207Smrg @property 264*4c3eb207Smrg def datetime(self): 265*4c3eb207Smrg for author in self.author_lines: 266*4c3eb207Smrg if author[1]: 267*4c3eb207Smrg return author[1] 268*4c3eb207Smrg return None 269*4c3eb207Smrg 270*4c3eb207Smrg @property 271*4c3eb207Smrg def authors(self): 272*4c3eb207Smrg return [author_line[0] for author_line in self.author_lines] 273*4c3eb207Smrg 274*4c3eb207Smrg @property 275*4c3eb207Smrg def is_empty(self): 276*4c3eb207Smrg return not self.lines and self.prs == self.initial_prs 277*4c3eb207Smrg 278*4c3eb207Smrg def contains_author(self, author): 279*4c3eb207Smrg for author_lines in self.author_lines: 280*4c3eb207Smrg if author_lines[0] == author: 281*4c3eb207Smrg return True 282*4c3eb207Smrg return False 283*4c3eb207Smrg 284*4c3eb207Smrg 285*4c3eb207Smrgclass GitInfo: 286*4c3eb207Smrg def __init__(self, hexsha, date, author, lines, modified_files): 287*4c3eb207Smrg self.hexsha = hexsha 288*4c3eb207Smrg self.date = date 289*4c3eb207Smrg self.author = author 290*4c3eb207Smrg self.lines = lines 291*4c3eb207Smrg self.modified_files = modified_files 292*4c3eb207Smrg 293*4c3eb207Smrg 294*4c3eb207Smrgclass GitCommit: 295*4c3eb207Smrg def __init__(self, info, commit_to_info_hook=None, ref_name=None): 296*4c3eb207Smrg self.original_info = info 297*4c3eb207Smrg self.info = info 298*4c3eb207Smrg self.message = None 299*4c3eb207Smrg self.changes = None 300*4c3eb207Smrg self.changelog_entries = [] 301*4c3eb207Smrg self.errors = [] 302*4c3eb207Smrg self.top_level_authors = [] 303*4c3eb207Smrg self.co_authors = [] 304*4c3eb207Smrg self.top_level_prs = [] 305*4c3eb207Smrg self.subject_prs = set() 306*4c3eb207Smrg self.cherry_pick_commit = None 307*4c3eb207Smrg self.revert_commit = None 308*4c3eb207Smrg self.commit_to_info_hook = commit_to_info_hook 309*4c3eb207Smrg self.init_changelog_locations(ref_name) 310*4c3eb207Smrg 311*4c3eb207Smrg # Skip Update copyright years commits 312*4c3eb207Smrg if self.info.lines and self.info.lines[0] == 'Update copyright years.': 313*4c3eb207Smrg return 314*4c3eb207Smrg 315*4c3eb207Smrg if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]: 316*4c3eb207Smrg self.errors.append(Error('Expected empty second line in commit message', info.lines[0])) 317*4c3eb207Smrg 318*4c3eb207Smrg # Identify first if the commit is a Revert commit 319*4c3eb207Smrg for line in self.info.lines: 320*4c3eb207Smrg m = revert_regex.match(line) 321*4c3eb207Smrg if m: 322*4c3eb207Smrg self.revert_commit = m.group('hash') 323*4c3eb207Smrg break 324*4c3eb207Smrg if self.revert_commit: 325*4c3eb207Smrg self.info = self.commit_to_info_hook(self.revert_commit) 326*4c3eb207Smrg 327*4c3eb207Smrg # The following happens for get_email.py: 328*4c3eb207Smrg if not self.info: 329*4c3eb207Smrg return 330*4c3eb207Smrg 331*4c3eb207Smrg self.check_commit_email() 332*4c3eb207Smrg 333*4c3eb207Smrg # Extract PR numbers form the subject line 334*4c3eb207Smrg # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn 335*4c3eb207Smrg if self.info.lines and not self.revert_commit: 336*4c3eb207Smrg self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])} 337*4c3eb207Smrg for m in subject_pr_regex.finditer(info.lines[0]): 338*4c3eb207Smrg if not m.group('component') in bug_components: 339*4c3eb207Smrg self.errors.append(Error('invalid PR component in subject', info.lines[0])) 340*4c3eb207Smrg self.subject_prs.add(m.group('pr')) 341*4c3eb207Smrg 342*4c3eb207Smrg # Allow complete deletion of ChangeLog files in a commit 343*4c3eb207Smrg project_files = [f for f in self.info.modified_files 344*4c3eb207Smrg if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D') 345*4c3eb207Smrg or f[0] in misc_files] 346*4c3eb207Smrg ignored_files = [f for f in self.info.modified_files 347*4c3eb207Smrg if self.in_ignored_location(f[0])] 348*4c3eb207Smrg if len(project_files) == len(self.info.modified_files): 349*4c3eb207Smrg # All modified files are only MISC files 350*4c3eb207Smrg return 351*4c3eb207Smrg elif project_files: 352*4c3eb207Smrg err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \ 353*4c3eb207Smrg 'should be done separately from normal commits\n' \ 354*4c3eb207Smrg '(note: ChangeLog entries will be automatically ' \ 355*4c3eb207Smrg 'added by a cron job)' 356*4c3eb207Smrg self.errors.append(Error(err)) 357*4c3eb207Smrg return 358*4c3eb207Smrg 359*4c3eb207Smrg all_are_ignored = (len(project_files) + len(ignored_files) 360*4c3eb207Smrg == len(self.info.modified_files)) 361*4c3eb207Smrg self.parse_lines(all_are_ignored) 362*4c3eb207Smrg if self.changes: 363*4c3eb207Smrg self.parse_changelog() 364*4c3eb207Smrg self.parse_file_names() 365*4c3eb207Smrg self.check_for_empty_description() 366*4c3eb207Smrg self.check_for_broken_parentheses() 367*4c3eb207Smrg self.deduce_changelog_locations() 368*4c3eb207Smrg self.check_file_patterns() 369*4c3eb207Smrg if not self.errors: 370*4c3eb207Smrg self.check_mentioned_files() 371*4c3eb207Smrg self.check_for_correct_changelog() 372*4c3eb207Smrg if self.subject_prs: 373*4c3eb207Smrg self.errors.append(Error('PR %s in subject but not in changelog' % 374*4c3eb207Smrg ', '.join(self.subject_prs), self.info.lines[0])) 375*4c3eb207Smrg 376*4c3eb207Smrg @property 377*4c3eb207Smrg def success(self): 378*4c3eb207Smrg return not self.errors 379*4c3eb207Smrg 380*4c3eb207Smrg @property 381*4c3eb207Smrg def new_files(self): 382*4c3eb207Smrg return [x[0] for x in self.info.modified_files if x[1] == 'A'] 383*4c3eb207Smrg 384*4c3eb207Smrg @classmethod 385*4c3eb207Smrg def is_changelog_filename(cls, path, allow_suffix=False): 386*4c3eb207Smrg basename = os.path.basename(path) 387*4c3eb207Smrg if basename == 'ChangeLog': 388*4c3eb207Smrg return True 389*4c3eb207Smrg elif allow_suffix and basename.startswith('ChangeLog'): 390*4c3eb207Smrg return True 391*4c3eb207Smrg else: 392*4c3eb207Smrg return False 393*4c3eb207Smrg 394*4c3eb207Smrg def find_changelog_location(self, name): 395*4c3eb207Smrg if name.startswith('\t'): 396*4c3eb207Smrg name = name[1:] 397*4c3eb207Smrg if name.endswith(':'): 398*4c3eb207Smrg name = name[:-1] 399*4c3eb207Smrg if name.endswith('/'): 400*4c3eb207Smrg name = name[:-1] 401*4c3eb207Smrg return name if name in self.changelog_locations else None 402*4c3eb207Smrg 403*4c3eb207Smrg @classmethod 404*4c3eb207Smrg def format_git_author(cls, author): 405*4c3eb207Smrg assert '<' in author 406*4c3eb207Smrg return author.replace('<', ' <') 407*4c3eb207Smrg 408*4c3eb207Smrg @classmethod 409*4c3eb207Smrg def parse_git_name_status(cls, string): 410*4c3eb207Smrg modified_files = [] 411*4c3eb207Smrg for entry in string.split('\n'): 412*4c3eb207Smrg parts = entry.split('\t') 413*4c3eb207Smrg t = parts[0] 414*4c3eb207Smrg if t == 'A' or t == 'D' or t == 'M': 415*4c3eb207Smrg modified_files.append((parts[1], t)) 416*4c3eb207Smrg elif t.startswith('R'): 417*4c3eb207Smrg modified_files.append((parts[1], 'D')) 418*4c3eb207Smrg modified_files.append((parts[2], 'A')) 419*4c3eb207Smrg return modified_files 420*4c3eb207Smrg 421*4c3eb207Smrg def init_changelog_locations(self, ref_name): 422*4c3eb207Smrg self.changelog_locations = list(default_changelog_locations) 423*4c3eb207Smrg if ref_name: 424*4c3eb207Smrg version = sys.maxsize 425*4c3eb207Smrg if 'releases/gcc-' in ref_name: 426*4c3eb207Smrg version = int(ref_name.split('-')[-1]) 427*4c3eb207Smrg if version >= 12: 428*4c3eb207Smrg # HSA and BRIG were removed in GCC 12 429*4c3eb207Smrg self.changelog_locations.remove('gcc/brig') 430*4c3eb207Smrg self.changelog_locations.remove('libhsail-rt') 431*4c3eb207Smrg 432*4c3eb207Smrg def parse_lines(self, all_are_ignored): 433*4c3eb207Smrg body = self.info.lines 434*4c3eb207Smrg 435*4c3eb207Smrg for i, b in enumerate(body): 436*4c3eb207Smrg if not b: 437*4c3eb207Smrg continue 438*4c3eb207Smrg if (changelog_regex.match(b) or self.find_changelog_location(b) 439*4c3eb207Smrg or star_prefix_regex.match(b) or pr_regex.match(b) 440*4c3eb207Smrg or dr_regex.match(b) or author_line_regex.match(b) 441*4c3eb207Smrg or b.lower().startswith(CO_AUTHORED_BY_PREFIX)): 442*4c3eb207Smrg self.changes = body[i:] 443*4c3eb207Smrg return 444*4c3eb207Smrg if not all_are_ignored: 445*4c3eb207Smrg self.errors.append(Error('cannot find a ChangeLog location in ' 446*4c3eb207Smrg 'message')) 447*4c3eb207Smrg 448*4c3eb207Smrg def parse_changelog(self): 449*4c3eb207Smrg last_entry = None 450*4c3eb207Smrg will_deduce = False 451*4c3eb207Smrg for line in self.changes: 452*4c3eb207Smrg if not line: 453*4c3eb207Smrg if last_entry and will_deduce: 454*4c3eb207Smrg last_entry = None 455*4c3eb207Smrg continue 456*4c3eb207Smrg if line != line.rstrip(): 457*4c3eb207Smrg self.errors.append(Error('trailing whitespace', line)) 458*4c3eb207Smrg if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT: 459*4c3eb207Smrg # support long filenames 460*4c3eb207Smrg if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]: 461*4c3eb207Smrg self.errors.append(Error('line exceeds %d character limit' 462*4c3eb207Smrg % LINE_LIMIT, line)) 463*4c3eb207Smrg m = changelog_regex.match(line) 464*4c3eb207Smrg if m: 465*4c3eb207Smrg last_entry = ChangeLogEntry(m.group(1).rstrip('/'), 466*4c3eb207Smrg self.top_level_authors, 467*4c3eb207Smrg self.top_level_prs) 468*4c3eb207Smrg self.changelog_entries.append(last_entry) 469*4c3eb207Smrg elif self.find_changelog_location(line): 470*4c3eb207Smrg last_entry = ChangeLogEntry(self.find_changelog_location(line), 471*4c3eb207Smrg self.top_level_authors, 472*4c3eb207Smrg self.top_level_prs) 473*4c3eb207Smrg self.changelog_entries.append(last_entry) 474*4c3eb207Smrg else: 475*4c3eb207Smrg author_tuple = None 476*4c3eb207Smrg pr_line = None 477*4c3eb207Smrg if author_line_regex.match(line): 478*4c3eb207Smrg m = author_line_regex.match(line) 479*4c3eb207Smrg author_tuple = (m.group('name'), m.group('datetime')) 480*4c3eb207Smrg elif additional_author_regex.match(line): 481*4c3eb207Smrg m = additional_author_regex.match(line) 482*4c3eb207Smrg if len(m.group('spaces')) != 4: 483*4c3eb207Smrg msg = 'additional author must be indented with '\ 484*4c3eb207Smrg 'one tab and four spaces' 485*4c3eb207Smrg self.errors.append(Error(msg, line)) 486*4c3eb207Smrg else: 487*4c3eb207Smrg author_tuple = (m.group('name'), None) 488*4c3eb207Smrg elif pr_regex.match(line): 489*4c3eb207Smrg m = pr_regex.match(line) 490*4c3eb207Smrg component = m.group('component') 491*4c3eb207Smrg pr = m.group('pr') 492*4c3eb207Smrg if not component: 493*4c3eb207Smrg self.errors.append(Error('missing PR component', line)) 494*4c3eb207Smrg continue 495*4c3eb207Smrg elif not component[:-1] in bug_components: 496*4c3eb207Smrg self.errors.append(Error('invalid PR component', line)) 497*4c3eb207Smrg continue 498*4c3eb207Smrg else: 499*4c3eb207Smrg pr_line = line.lstrip() 500*4c3eb207Smrg if pr in self.subject_prs: 501*4c3eb207Smrg self.subject_prs.remove(pr) 502*4c3eb207Smrg elif dr_regex.match(line): 503*4c3eb207Smrg pr_line = line.lstrip() 504*4c3eb207Smrg 505*4c3eb207Smrg lowered_line = line.lower() 506*4c3eb207Smrg if lowered_line.startswith(CO_AUTHORED_BY_PREFIX): 507*4c3eb207Smrg name = line[len(CO_AUTHORED_BY_PREFIX):] 508*4c3eb207Smrg author = self.format_git_author(name) 509*4c3eb207Smrg self.co_authors.append(author) 510*4c3eb207Smrg continue 511*4c3eb207Smrg elif lowered_line.startswith(REVIEW_PREFIXES): 512*4c3eb207Smrg continue 513*4c3eb207Smrg else: 514*4c3eb207Smrg m = cherry_pick_regex.search(line) 515*4c3eb207Smrg if m: 516*4c3eb207Smrg commit = m.group('hash') 517*4c3eb207Smrg if self.cherry_pick_commit: 518*4c3eb207Smrg msg = 'multiple cherry pick lines' 519*4c3eb207Smrg self.errors.append(Error(msg, line)) 520*4c3eb207Smrg else: 521*4c3eb207Smrg self.cherry_pick_commit = commit 522*4c3eb207Smrg continue 523*4c3eb207Smrg 524*4c3eb207Smrg # ChangeLog name will be deduced later 525*4c3eb207Smrg if not last_entry: 526*4c3eb207Smrg if author_tuple: 527*4c3eb207Smrg self.top_level_authors.append(author_tuple) 528*4c3eb207Smrg continue 529*4c3eb207Smrg elif pr_line: 530*4c3eb207Smrg # append to top_level_prs only when we haven't met 531*4c3eb207Smrg # a ChangeLog entry 532*4c3eb207Smrg if (pr_line not in self.top_level_prs 533*4c3eb207Smrg and not self.changelog_entries): 534*4c3eb207Smrg self.top_level_prs.append(pr_line) 535*4c3eb207Smrg continue 536*4c3eb207Smrg else: 537*4c3eb207Smrg last_entry = ChangeLogEntry(None, 538*4c3eb207Smrg self.top_level_authors, 539*4c3eb207Smrg self.top_level_prs) 540*4c3eb207Smrg self.changelog_entries.append(last_entry) 541*4c3eb207Smrg will_deduce = True 542*4c3eb207Smrg elif author_tuple: 543*4c3eb207Smrg if not last_entry.contains_author(author_tuple[0]): 544*4c3eb207Smrg last_entry.author_lines.append(author_tuple) 545*4c3eb207Smrg continue 546*4c3eb207Smrg 547*4c3eb207Smrg if not line.startswith('\t'): 548*4c3eb207Smrg err = Error('line should start with a tab', line) 549*4c3eb207Smrg self.errors.append(err) 550*4c3eb207Smrg elif pr_line: 551*4c3eb207Smrg last_entry.prs.append(pr_line) 552*4c3eb207Smrg else: 553*4c3eb207Smrg m = star_prefix_regex.match(line) 554*4c3eb207Smrg if m: 555*4c3eb207Smrg if (len(m.group('spaces')) != 1 and 556*4c3eb207Smrg not last_entry.parentheses_stack): 557*4c3eb207Smrg msg = 'one space should follow asterisk' 558*4c3eb207Smrg self.errors.append(Error(msg, line)) 559*4c3eb207Smrg else: 560*4c3eb207Smrg content = m.group('content') 561*4c3eb207Smrg parts = content.split(':') 562*4c3eb207Smrg if len(parts) > 1: 563*4c3eb207Smrg for needle in ('()', '[]', '<>'): 564*4c3eb207Smrg if ' ' + needle in parts[0]: 565*4c3eb207Smrg msg = f'empty group "{needle}" found' 566*4c3eb207Smrg self.errors.append(Error(msg, line)) 567*4c3eb207Smrg last_entry.lines.append(line) 568*4c3eb207Smrg self.process_parentheses(last_entry, line) 569*4c3eb207Smrg else: 570*4c3eb207Smrg if last_entry.is_empty: 571*4c3eb207Smrg msg = 'first line should start with a tab, ' \ 572*4c3eb207Smrg 'an asterisk and a space' 573*4c3eb207Smrg self.errors.append(Error(msg, line)) 574*4c3eb207Smrg else: 575*4c3eb207Smrg last_entry.lines.append(line) 576*4c3eb207Smrg self.process_parentheses(last_entry, line) 577*4c3eb207Smrg 578*4c3eb207Smrg def process_parentheses(self, last_entry, line): 579*4c3eb207Smrg for c in line: 580*4c3eb207Smrg if c == '(': 581*4c3eb207Smrg last_entry.parentheses_stack.append(line) 582*4c3eb207Smrg elif c == ')': 583*4c3eb207Smrg if not last_entry.parentheses_stack: 584*4c3eb207Smrg msg = 'bad wrapping of parenthesis' 585*4c3eb207Smrg self.errors.append(Error(msg, line)) 586*4c3eb207Smrg else: 587*4c3eb207Smrg del last_entry.parentheses_stack[-1] 588*4c3eb207Smrg 589*4c3eb207Smrg def parse_file_names(self): 590*4c3eb207Smrg for entry in self.changelog_entries: 591*4c3eb207Smrg entry.parse_file_names() 592*4c3eb207Smrg 593*4c3eb207Smrg def check_file_patterns(self): 594*4c3eb207Smrg for entry in self.changelog_entries: 595*4c3eb207Smrg for pattern in entry.file_patterns: 596*4c3eb207Smrg name = os.path.join(entry.folder, pattern) 597*4c3eb207Smrg if not [name.startswith(pr) for pr in wildcard_prefixes]: 598*4c3eb207Smrg msg = 'unsupported wildcard prefix' 599*4c3eb207Smrg self.errors.append(Error(msg, name)) 600*4c3eb207Smrg 601*4c3eb207Smrg def check_for_empty_description(self): 602*4c3eb207Smrg for entry in self.changelog_entries: 603*4c3eb207Smrg for i, line in enumerate(entry.lines): 604*4c3eb207Smrg if (item_empty_regex.match(line) and 605*4c3eb207Smrg (i == len(entry.lines) - 1 606*4c3eb207Smrg or not entry.lines[i+1].strip() 607*4c3eb207Smrg or item_parenthesis_regex.match(entry.lines[i+1]))): 608*4c3eb207Smrg msg = 'missing description of a change' 609*4c3eb207Smrg self.errors.append(Error(msg, line)) 610*4c3eb207Smrg 611*4c3eb207Smrg def check_for_broken_parentheses(self): 612*4c3eb207Smrg for entry in self.changelog_entries: 613*4c3eb207Smrg if entry.parentheses_stack: 614*4c3eb207Smrg msg = 'bad parentheses wrapping' 615*4c3eb207Smrg self.errors.append(Error(msg, entry.parentheses_stack[-1])) 616*4c3eb207Smrg 617*4c3eb207Smrg def get_file_changelog_location(self, changelog_file): 618*4c3eb207Smrg for file in self.info.modified_files: 619*4c3eb207Smrg if file[0] == changelog_file: 620*4c3eb207Smrg # root ChangeLog file 621*4c3eb207Smrg return '' 622*4c3eb207Smrg index = file[0].find('/' + changelog_file) 623*4c3eb207Smrg if index != -1: 624*4c3eb207Smrg return file[0][:index] 625*4c3eb207Smrg return None 626*4c3eb207Smrg 627*4c3eb207Smrg def deduce_changelog_locations(self): 628*4c3eb207Smrg for entry in self.changelog_entries: 629*4c3eb207Smrg if not entry.folder: 630*4c3eb207Smrg changelog = None 631*4c3eb207Smrg for file in entry.files: 632*4c3eb207Smrg location = self.get_file_changelog_location(file) 633*4c3eb207Smrg if (location == '' 634*4c3eb207Smrg or (location and location in self.changelog_locations)): 635*4c3eb207Smrg if changelog and changelog != location: 636*4c3eb207Smrg msg = 'could not deduce ChangeLog file, ' \ 637*4c3eb207Smrg 'not unique location' 638*4c3eb207Smrg self.errors.append(Error(msg)) 639*4c3eb207Smrg return 640*4c3eb207Smrg changelog = location 641*4c3eb207Smrg if changelog is not None: 642*4c3eb207Smrg entry.folder = changelog 643*4c3eb207Smrg else: 644*4c3eb207Smrg msg = 'could not deduce ChangeLog file' 645*4c3eb207Smrg self.errors.append(Error(msg)) 646*4c3eb207Smrg 647*4c3eb207Smrg @classmethod 648*4c3eb207Smrg def in_ignored_location(cls, path): 649*4c3eb207Smrg for ignored in ignored_prefixes: 650*4c3eb207Smrg if path.startswith(ignored): 651*4c3eb207Smrg return True 652*4c3eb207Smrg return False 653*4c3eb207Smrg 654*4c3eb207Smrg def get_changelog_by_path(self, path): 655*4c3eb207Smrg components = path.split('/') 656*4c3eb207Smrg while components: 657*4c3eb207Smrg if '/'.join(components) in self.changelog_locations: 658*4c3eb207Smrg break 659*4c3eb207Smrg components = components[:-1] 660*4c3eb207Smrg return '/'.join(components) 661*4c3eb207Smrg 662*4c3eb207Smrg def check_mentioned_files(self): 663*4c3eb207Smrg folder_count = len([x.folder for x in self.changelog_entries]) 664*4c3eb207Smrg assert folder_count == len(self.changelog_entries) 665*4c3eb207Smrg 666*4c3eb207Smrg mentioned_files = set() 667*4c3eb207Smrg mentioned_patterns = [] 668*4c3eb207Smrg used_patterns = set() 669*4c3eb207Smrg for entry in self.changelog_entries: 670*4c3eb207Smrg if not entry.files and not entry.file_patterns: 671*4c3eb207Smrg msg = 'no files mentioned for ChangeLog in directory' 672*4c3eb207Smrg self.errors.append(Error(msg, entry.folder)) 673*4c3eb207Smrg assert not entry.folder.endswith('/') 674*4c3eb207Smrg for file in entry.files: 675*4c3eb207Smrg if not self.is_changelog_filename(file): 676*4c3eb207Smrg item = os.path.join(entry.folder, file) 677*4c3eb207Smrg if item in mentioned_files: 678*4c3eb207Smrg msg = 'same file specified multiple times' 679*4c3eb207Smrg self.errors.append(Error(msg, file)) 680*4c3eb207Smrg else: 681*4c3eb207Smrg mentioned_files.add(item) 682*4c3eb207Smrg for pattern in entry.file_patterns: 683*4c3eb207Smrg mentioned_patterns.append(os.path.join(entry.folder, pattern)) 684*4c3eb207Smrg 685*4c3eb207Smrg cand = [x[0] for x in self.info.modified_files 686*4c3eb207Smrg if not self.is_changelog_filename(x[0])] 687*4c3eb207Smrg changed_files = set(cand) 688*4c3eb207Smrg for file in sorted(mentioned_files - changed_files): 689*4c3eb207Smrg msg = 'unchanged file mentioned in a ChangeLog' 690*4c3eb207Smrg candidates = difflib.get_close_matches(file, changed_files, 1) 691*4c3eb207Smrg details = None 692*4c3eb207Smrg if candidates: 693*4c3eb207Smrg msg += f' (did you mean "{candidates[0]}"?)' 694*4c3eb207Smrg details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip() 695*4c3eb207Smrg self.errors.append(Error(msg, file, details)) 696*4c3eb207Smrg for file in sorted(changed_files - mentioned_files): 697*4c3eb207Smrg if not self.in_ignored_location(file): 698*4c3eb207Smrg if file in self.new_files: 699*4c3eb207Smrg changelog_location = self.get_changelog_by_path(file) 700*4c3eb207Smrg # Python2: we cannot use next(filter(...)) 701*4c3eb207Smrg entries = filter(lambda x: x.folder == changelog_location, 702*4c3eb207Smrg self.changelog_entries) 703*4c3eb207Smrg entries = list(entries) 704*4c3eb207Smrg entry = entries[0] if entries else None 705*4c3eb207Smrg if not entry: 706*4c3eb207Smrg prs = self.top_level_prs 707*4c3eb207Smrg if not prs: 708*4c3eb207Smrg # if all ChangeLog entries have identical PRs 709*4c3eb207Smrg # then use them 710*4c3eb207Smrg prs = self.changelog_entries[0].prs 711*4c3eb207Smrg for entry in self.changelog_entries: 712*4c3eb207Smrg if entry.prs != prs: 713*4c3eb207Smrg prs = [] 714*4c3eb207Smrg break 715*4c3eb207Smrg entry = ChangeLogEntry(changelog_location, 716*4c3eb207Smrg self.top_level_authors, 717*4c3eb207Smrg prs) 718*4c3eb207Smrg self.changelog_entries.append(entry) 719*4c3eb207Smrg # strip prefix of the file 720*4c3eb207Smrg assert file.startswith(entry.folder) 721*4c3eb207Smrg # do not allow auto-addition of New files 722*4c3eb207Smrg # for the top-level folder 723*4c3eb207Smrg if entry.folder: 724*4c3eb207Smrg file = file[len(entry.folder):].lstrip('/') 725*4c3eb207Smrg entry.lines.append('\t* %s: New file.' % file) 726*4c3eb207Smrg entry.files.append(file) 727*4c3eb207Smrg else: 728*4c3eb207Smrg msg = 'new file in the top-level folder not mentioned in a ChangeLog' 729*4c3eb207Smrg self.errors.append(Error(msg, file)) 730*4c3eb207Smrg else: 731*4c3eb207Smrg used_pattern = [p for p in mentioned_patterns 732*4c3eb207Smrg if file.startswith(p)] 733*4c3eb207Smrg used_pattern = used_pattern[0] if used_pattern else None 734*4c3eb207Smrg if used_pattern: 735*4c3eb207Smrg used_patterns.add(used_pattern) 736*4c3eb207Smrg else: 737*4c3eb207Smrg msg = 'changed file not mentioned in a ChangeLog' 738*4c3eb207Smrg self.errors.append(Error(msg, file)) 739*4c3eb207Smrg 740*4c3eb207Smrg for pattern in mentioned_patterns: 741*4c3eb207Smrg if pattern not in used_patterns: 742*4c3eb207Smrg error = "pattern doesn't match any changed files" 743*4c3eb207Smrg self.errors.append(Error(error, pattern)) 744*4c3eb207Smrg 745*4c3eb207Smrg def check_for_correct_changelog(self): 746*4c3eb207Smrg for entry in self.changelog_entries: 747*4c3eb207Smrg for file in entry.files: 748*4c3eb207Smrg full_path = os.path.join(entry.folder, file) 749*4c3eb207Smrg changelog_location = self.get_changelog_by_path(full_path) 750*4c3eb207Smrg if changelog_location != entry.folder: 751*4c3eb207Smrg msg = 'wrong ChangeLog location "%s", should be "%s"' 752*4c3eb207Smrg err = Error(msg % (entry.folder, changelog_location), file) 753*4c3eb207Smrg self.errors.append(err) 754*4c3eb207Smrg 755*4c3eb207Smrg @classmethod 756*4c3eb207Smrg def format_authors_in_changelog(cls, authors, timestamp, prefix=''): 757*4c3eb207Smrg output = '' 758*4c3eb207Smrg for i, author in enumerate(authors): 759*4c3eb207Smrg if i == 0: 760*4c3eb207Smrg output += '%s%s %s\n' % (prefix, timestamp, author) 761*4c3eb207Smrg else: 762*4c3eb207Smrg output += '%s\t %s\n' % (prefix, author) 763*4c3eb207Smrg output += '\n' 764*4c3eb207Smrg return output 765*4c3eb207Smrg 766*4c3eb207Smrg def to_changelog_entries(self, use_commit_ts=False): 767*4c3eb207Smrg current_timestamp = self.info.date.strftime(DATE_FORMAT) 768*4c3eb207Smrg for entry in self.changelog_entries: 769*4c3eb207Smrg output = '' 770*4c3eb207Smrg timestamp = entry.datetime 771*4c3eb207Smrg if self.revert_commit: 772*4c3eb207Smrg timestamp = current_timestamp 773*4c3eb207Smrg orig_date = self.original_info.date 774*4c3eb207Smrg current_timestamp = orig_date.strftime(DATE_FORMAT) 775*4c3eb207Smrg elif self.cherry_pick_commit: 776*4c3eb207Smrg info = self.commit_to_info_hook(self.cherry_pick_commit) 777*4c3eb207Smrg # it can happen that it is a cherry-pick for a different 778*4c3eb207Smrg # repository 779*4c3eb207Smrg if info: 780*4c3eb207Smrg timestamp = info.date.strftime(DATE_FORMAT) 781*4c3eb207Smrg else: 782*4c3eb207Smrg timestamp = current_timestamp 783*4c3eb207Smrg elif not timestamp or use_commit_ts: 784*4c3eb207Smrg timestamp = current_timestamp 785*4c3eb207Smrg authors = entry.authors if entry.authors else [self.info.author] 786*4c3eb207Smrg # add Co-Authored-By authors to all ChangeLog entries 787*4c3eb207Smrg for author in self.co_authors: 788*4c3eb207Smrg if author not in authors: 789*4c3eb207Smrg authors.append(author) 790*4c3eb207Smrg 791*4c3eb207Smrg if self.cherry_pick_commit or self.revert_commit: 792*4c3eb207Smrg original_author = self.original_info.author 793*4c3eb207Smrg output += self.format_authors_in_changelog([original_author], 794*4c3eb207Smrg current_timestamp) 795*4c3eb207Smrg if self.revert_commit: 796*4c3eb207Smrg output += '\tRevert:\n' 797*4c3eb207Smrg else: 798*4c3eb207Smrg output += '\tBackported from master:\n' 799*4c3eb207Smrg output += self.format_authors_in_changelog(authors, 800*4c3eb207Smrg timestamp, '\t') 801*4c3eb207Smrg else: 802*4c3eb207Smrg output += self.format_authors_in_changelog(authors, timestamp) 803*4c3eb207Smrg for pr in entry.prs: 804*4c3eb207Smrg output += '\t%s\n' % pr 805*4c3eb207Smrg for line in entry.lines: 806*4c3eb207Smrg output += line + '\n' 807*4c3eb207Smrg yield (entry.folder, output.rstrip()) 808*4c3eb207Smrg 809*4c3eb207Smrg def print_output(self): 810*4c3eb207Smrg for entry, output in self.to_changelog_entries(): 811*4c3eb207Smrg print('------ %s/ChangeLog ------ ' % entry) 812*4c3eb207Smrg print(output) 813*4c3eb207Smrg 814*4c3eb207Smrg def print_errors(self): 815*4c3eb207Smrg print('Errors:') 816*4c3eb207Smrg for error in self.errors: 817*4c3eb207Smrg print(error) 818*4c3eb207Smrg 819*4c3eb207Smrg def check_commit_email(self): 820*4c3eb207Smrg # Parse 'Martin Liska <mliska@suse.cz>' 821*4c3eb207Smrg email = self.info.author.split(' ')[-1].strip('<>') 822*4c3eb207Smrg 823*4c3eb207Smrg # Verify that all characters are ASCII 824*4c3eb207Smrg # TODO: Python 3.7 provides a nicer function: isascii 825*4c3eb207Smrg if len(email) != len(email.encode()): 826*4c3eb207Smrg self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})')) 827