1#!/usr/bin/env python3 2# 3# This file is part of GCC. 4# 5# GCC is free software; you can redistribute it and/or modify it under 6# the terms of the GNU General Public License as published by the Free 7# Software Foundation; either version 3, or (at your option) any later 8# version. 9# 10# GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11# WARRANTY; without even the implied warranty of MERCHANTABILITY or 12# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13# for more details. 14# 15# You should have received a copy of the GNU General Public License 16# along with GCC; see the file COPYING3. If not see 17# <http://www.gnu.org/licenses/>. */ 18 19import difflib 20import os 21import re 22 23changelog_locations = { 24 'c++tools', 25 'config', 26 'contrib', 27 'contrib/header-tools', 28 'contrib/reghunt', 29 'contrib/regression', 30 'fixincludes', 31 'gcc/ada', 32 'gcc/analyzer', 33 'gcc/brig', 34 'gcc/c', 35 'gcc/c-family', 36 'gcc', 37 'gcc/cp', 38 'gcc/d', 39 'gcc/fortran', 40 'gcc/go', 41 'gcc/jit', 42 'gcc/lto', 43 'gcc/objc', 44 'gcc/objcp', 45 'gcc/po', 46 'gcc/testsuite', 47 'gnattools', 48 'gotools', 49 'include', 50 'intl', 51 'libada', 52 'libatomic', 53 'libbacktrace', 54 'libcc1', 55 'libcody', 56 'libcpp', 57 'libcpp/po', 58 'libdecnumber', 59 'libffi', 60 'libgcc', 61 'libgcc/config/avr/libf7', 62 'libgcc/config/libbid', 63 'libgfortran', 64 'libgomp', 65 'libhsail-rt', 66 'libiberty', 67 'libitm', 68 'libobjc', 69 'liboffloadmic', 70 'libphobos', 71 'libquadmath', 72 'libsanitizer', 73 'libssp', 74 'libstdc++-v3', 75 'libvtv', 76 'lto-plugin', 77 'maintainer-scripts', 78 'zlib'} 79 80bug_components = { 81 'ada', 82 'analyzer', 83 'boehm-gc', 84 'bootstrap', 85 'c', 86 'c++', 87 'd', 88 'debug', 89 'demangler', 90 'driver', 91 'fastjar', 92 'fortran', 93 'gcov-profile', 94 'go', 95 'hsa', 96 'inline-asm', 97 'ipa', 98 'java', 99 'jit', 100 'libbacktrace', 101 'libf2c', 102 'libffi', 103 'libfortran', 104 'libgcc', 105 'libgcj', 106 'libgomp', 107 'libitm', 108 'libobjc', 109 'libquadmath', 110 'libstdc++', 111 'lto', 112 'middle-end', 113 'modula2', 114 'objc', 115 'objc++', 116 'other', 117 'pch', 118 'pending', 119 'plugins', 120 'preprocessor', 121 'regression', 122 'rtl-optimization', 123 'sanitizer', 124 'spam', 125 'target', 126 'testsuite', 127 'translation', 128 'tree-optimization', 129 'web'} 130 131ignored_prefixes = { 132 'gcc/d/dmd/', 133 'gcc/go/gofrontend/', 134 'gcc/testsuite/gdc.test/', 135 'gcc/testsuite/go.test/test/', 136 'libgo/', 137 'libphobos/libdruntime/', 138 'libphobos/src/', 139 'libsanitizer/', 140 } 141 142wildcard_prefixes = { 143 'gcc/testsuite/', 144 'libstdc++-v3/doc/html/', 145 'libstdc++-v3/testsuite/' 146 } 147 148misc_files = { 149 'gcc/DATESTAMP', 150 'gcc/BASE-VER', 151 'gcc/DEV-PHASE' 152 } 153 154author_line_regex = \ 155 re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)') 156additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)') 157changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?') 158pr_regex = re.compile(r'\tPR (?P<component>[a-z+-]+\/)?([0-9]+)$') 159dr_regex = re.compile(r'\tDR ([0-9]+)$') 160star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)') 161end_of_location_regex = re.compile(r'[\[<(:]') 162item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$') 163item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)') 164revert_regex = re.compile(r'This reverts commit (?P<hash>\w+).$') 165cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)') 166 167LINE_LIMIT = 100 168TAB_WIDTH = 8 169CO_AUTHORED_BY_PREFIX = 'co-authored-by: ' 170 171REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ', 172 'acked-by: ', 'tested-by: ', 'reported-by: ', 173 'suggested-by: ') 174DATE_FORMAT = '%Y-%m-%d' 175 176 177def decode_path(path): 178 # When core.quotepath is true (default value), utf8 chars are encoded like: 179 # "b/ko\304\215ka.txt" 180 # 181 # The upstream bug is fixed: 182 # https://github.com/gitpython-developers/GitPython/issues/1099 183 # 184 # but we still need a workaround for older versions of the library. 185 # Please take a look at the explanation of the transformation: 186 # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string 187 188 if path.startswith('"') and path.endswith('"'): 189 return (path.strip('"').encode('utf8').decode('unicode-escape') 190 .encode('latin-1').decode('utf8')) 191 else: 192 return path 193 194 195class Error: 196 def __init__(self, message, line=None): 197 self.message = message 198 self.line = line 199 200 def __repr__(self): 201 s = self.message 202 if self.line: 203 s += ':"%s"' % self.line 204 return s 205 206 207class ChangeLogEntry: 208 def __init__(self, folder, authors, prs): 209 self.folder = folder 210 # The 'list.copy()' function is not available before Python 3.3 211 self.author_lines = list(authors) 212 self.initial_prs = list(prs) 213 self.prs = list(prs) 214 self.lines = [] 215 self.files = [] 216 self.file_patterns = [] 217 218 def parse_file_names(self): 219 # Whether the content currently processed is between a star prefix the 220 # end of the file list: a colon or an open paren. 221 in_location = False 222 223 for line in self.lines: 224 # If this line matches the star prefix, start the location 225 # processing on the information that follows the star. 226 m = star_prefix_regex.match(line) 227 if m: 228 in_location = True 229 line = m.group('content') 230 231 if in_location: 232 # Strip everything that is not a filename in "line": 233 # entities "(NAME)", cases "<PATTERN>", conditions 234 # "[COND]", entry text (the colon, if present, and 235 # anything that follows it). 236 m = end_of_location_regex.search(line) 237 if m: 238 line = line[:m.start()] 239 in_location = False 240 241 # At this point, all that's left is a list of filenames 242 # separated by commas and whitespaces. 243 for file in line.split(','): 244 file = file.strip() 245 if file: 246 if file.endswith('*'): 247 self.file_patterns.append(file[:-1]) 248 else: 249 self.files.append(file) 250 251 @property 252 def datetime(self): 253 for author in self.author_lines: 254 if author[1]: 255 return author[1] 256 return None 257 258 @property 259 def authors(self): 260 return [author_line[0] for author_line in self.author_lines] 261 262 @property 263 def is_empty(self): 264 return not self.lines and self.prs == self.initial_prs 265 266 def contains_author(self, author): 267 for author_lines in self.author_lines: 268 if author_lines[0] == author: 269 return True 270 return False 271 272 273class GitInfo: 274 def __init__(self, hexsha, date, author, lines, modified_files): 275 self.hexsha = hexsha 276 self.date = date 277 self.author = author 278 self.lines = lines 279 self.modified_files = modified_files 280 281 282class GitCommit: 283 def __init__(self, info, strict=True, commit_to_info_hook=None): 284 self.original_info = info 285 self.info = info 286 self.message = None 287 self.changes = None 288 self.changelog_entries = [] 289 self.errors = [] 290 self.top_level_authors = [] 291 self.co_authors = [] 292 self.top_level_prs = [] 293 self.cherry_pick_commit = None 294 self.revert_commit = None 295 self.commit_to_info_hook = commit_to_info_hook 296 297 # Skip Update copyright years commits 298 if self.info.lines and self.info.lines[0] == 'Update copyright years.': 299 return 300 301 # Identify first if the commit is a Revert commit 302 for line in self.info.lines: 303 m = revert_regex.match(line) 304 if m: 305 self.revert_commit = m.group('hash') 306 break 307 if self.revert_commit: 308 self.info = self.commit_to_info_hook(self.revert_commit) 309 310 project_files = [f for f in self.info.modified_files 311 if self.is_changelog_filename(f[0]) 312 or f[0] in misc_files] 313 ignored_files = [f for f in self.info.modified_files 314 if self.in_ignored_location(f[0])] 315 if len(project_files) == len(self.info.modified_files): 316 # All modified files are only MISC files 317 return 318 elif project_files and strict: 319 self.errors.append(Error('ChangeLog, DATESTAMP, BASE-VER and ' 320 'DEV-PHASE updates should be done ' 321 'separately from normal commits')) 322 return 323 324 all_are_ignored = (len(project_files) + len(ignored_files) 325 == len(self.info.modified_files)) 326 self.parse_lines(all_are_ignored) 327 if self.changes: 328 self.parse_changelog() 329 self.parse_file_names() 330 self.check_for_empty_description() 331 self.deduce_changelog_locations() 332 self.check_file_patterns() 333 if not self.errors: 334 self.check_mentioned_files() 335 self.check_for_correct_changelog() 336 337 @property 338 def success(self): 339 return not self.errors 340 341 @property 342 def new_files(self): 343 return [x[0] for x in self.info.modified_files if x[1] == 'A'] 344 345 @classmethod 346 def is_changelog_filename(cls, path): 347 return path.endswith('/ChangeLog') or path == 'ChangeLog' 348 349 @classmethod 350 def find_changelog_location(cls, name): 351 if name.startswith('\t'): 352 name = name[1:] 353 if name.endswith(':'): 354 name = name[:-1] 355 if name.endswith('/'): 356 name = name[:-1] 357 return name if name in changelog_locations else None 358 359 @classmethod 360 def format_git_author(cls, author): 361 assert '<' in author 362 return author.replace('<', ' <') 363 364 @classmethod 365 def parse_git_name_status(cls, string): 366 modified_files = [] 367 for entry in string.split('\n'): 368 parts = entry.split('\t') 369 t = parts[0] 370 if t == 'A' or t == 'D' or t == 'M': 371 modified_files.append((parts[1], t)) 372 elif t.startswith('R'): 373 modified_files.append((parts[1], 'D')) 374 modified_files.append((parts[2], 'A')) 375 return modified_files 376 377 def parse_lines(self, all_are_ignored): 378 body = self.info.lines 379 380 for i, b in enumerate(body): 381 if not b: 382 continue 383 if (changelog_regex.match(b) or self.find_changelog_location(b) 384 or star_prefix_regex.match(b) or pr_regex.match(b) 385 or dr_regex.match(b) or author_line_regex.match(b)): 386 self.changes = body[i:] 387 return 388 if not all_are_ignored: 389 self.errors.append(Error('cannot find a ChangeLog location in ' 390 'message')) 391 392 def parse_changelog(self): 393 last_entry = None 394 will_deduce = False 395 for line in self.changes: 396 if not line: 397 if last_entry and will_deduce: 398 last_entry = None 399 continue 400 if line != line.rstrip(): 401 self.errors.append(Error('trailing whitespace', line)) 402 if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT: 403 self.errors.append(Error('line exceeds %d character limit' 404 % LINE_LIMIT, line)) 405 m = changelog_regex.match(line) 406 if m: 407 last_entry = ChangeLogEntry(m.group(1).rstrip('/'), 408 self.top_level_authors, 409 self.top_level_prs) 410 self.changelog_entries.append(last_entry) 411 elif self.find_changelog_location(line): 412 last_entry = ChangeLogEntry(self.find_changelog_location(line), 413 self.top_level_authors, 414 self.top_level_prs) 415 self.changelog_entries.append(last_entry) 416 else: 417 author_tuple = None 418 pr_line = None 419 if author_line_regex.match(line): 420 m = author_line_regex.match(line) 421 author_tuple = (m.group('name'), m.group('datetime')) 422 elif additional_author_regex.match(line): 423 m = additional_author_regex.match(line) 424 if len(m.group('spaces')) != 4: 425 msg = 'additional author must be indented with '\ 426 'one tab and four spaces' 427 self.errors.append(Error(msg, line)) 428 else: 429 author_tuple = (m.group('name'), None) 430 elif pr_regex.match(line): 431 component = pr_regex.match(line).group('component') 432 if not component: 433 self.errors.append(Error('missing PR component', line)) 434 continue 435 elif not component[:-1] in bug_components: 436 self.errors.append(Error('invalid PR component', line)) 437 continue 438 else: 439 pr_line = line.lstrip() 440 elif dr_regex.match(line): 441 pr_line = line.lstrip() 442 443 lowered_line = line.lower() 444 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX): 445 name = line[len(CO_AUTHORED_BY_PREFIX):] 446 author = self.format_git_author(name) 447 self.co_authors.append(author) 448 continue 449 elif lowered_line.startswith(REVIEW_PREFIXES): 450 continue 451 else: 452 m = cherry_pick_regex.search(line) 453 if m: 454 commit = m.group('hash') 455 if self.cherry_pick_commit: 456 msg = 'multiple cherry pick lines' 457 self.errors.append(Error(msg, line)) 458 else: 459 self.cherry_pick_commit = commit 460 continue 461 462 # ChangeLog name will be deduced later 463 if not last_entry: 464 if author_tuple: 465 self.top_level_authors.append(author_tuple) 466 continue 467 elif pr_line: 468 # append to top_level_prs only when we haven't met 469 # a ChangeLog entry 470 if (pr_line not in self.top_level_prs 471 and not self.changelog_entries): 472 self.top_level_prs.append(pr_line) 473 continue 474 else: 475 last_entry = ChangeLogEntry(None, 476 self.top_level_authors, 477 self.top_level_prs) 478 self.changelog_entries.append(last_entry) 479 will_deduce = True 480 elif author_tuple: 481 if not last_entry.contains_author(author_tuple[0]): 482 last_entry.author_lines.append(author_tuple) 483 continue 484 485 if not line.startswith('\t'): 486 err = Error('line should start with a tab', line) 487 self.errors.append(err) 488 elif pr_line: 489 last_entry.prs.append(pr_line) 490 else: 491 m = star_prefix_regex.match(line) 492 if m: 493 if len(m.group('spaces')) != 1: 494 msg = 'one space should follow asterisk' 495 self.errors.append(Error(msg, line)) 496 else: 497 content = m.group('content') 498 parts = content.split(':') 499 if len(parts) > 1: 500 for needle in ('()', '[]', '<>'): 501 if ' ' + needle in parts[0]: 502 msg = f'empty group "{needle}" found' 503 self.errors.append(Error(msg, line)) 504 last_entry.lines.append(line) 505 else: 506 if last_entry.is_empty: 507 msg = 'first line should start with a tab, ' \ 508 'an asterisk and a space' 509 self.errors.append(Error(msg, line)) 510 else: 511 last_entry.lines.append(line) 512 513 def parse_file_names(self): 514 for entry in self.changelog_entries: 515 entry.parse_file_names() 516 517 def check_file_patterns(self): 518 for entry in self.changelog_entries: 519 for pattern in entry.file_patterns: 520 name = os.path.join(entry.folder, pattern) 521 if not [name.startswith(pr) for pr in wildcard_prefixes]: 522 msg = 'unsupported wildcard prefix' 523 self.errors.append(Error(msg, name)) 524 525 def check_for_empty_description(self): 526 for entry in self.changelog_entries: 527 for i, line in enumerate(entry.lines): 528 if (item_empty_regex.match(line) and 529 (i == len(entry.lines) - 1 530 or not entry.lines[i+1].strip() 531 or item_parenthesis_regex.match(entry.lines[i+1]))): 532 msg = 'missing description of a change' 533 self.errors.append(Error(msg, line)) 534 535 def get_file_changelog_location(self, changelog_file): 536 for file in self.info.modified_files: 537 if file[0] == changelog_file: 538 # root ChangeLog file 539 return '' 540 index = file[0].find('/' + changelog_file) 541 if index != -1: 542 return file[0][:index] 543 return None 544 545 def deduce_changelog_locations(self): 546 for entry in self.changelog_entries: 547 if not entry.folder: 548 changelog = None 549 for file in entry.files: 550 location = self.get_file_changelog_location(file) 551 if (location == '' 552 or (location and location in changelog_locations)): 553 if changelog and changelog != location: 554 msg = 'could not deduce ChangeLog file, ' \ 555 'not unique location' 556 self.errors.append(Error(msg)) 557 return 558 changelog = location 559 if changelog is not None: 560 entry.folder = changelog 561 else: 562 msg = 'could not deduce ChangeLog file' 563 self.errors.append(Error(msg)) 564 565 @classmethod 566 def in_ignored_location(cls, path): 567 for ignored in ignored_prefixes: 568 if path.startswith(ignored): 569 return True 570 return False 571 572 @classmethod 573 def get_changelog_by_path(cls, path): 574 components = path.split('/') 575 while components: 576 if '/'.join(components) in changelog_locations: 577 break 578 components = components[:-1] 579 return '/'.join(components) 580 581 def check_mentioned_files(self): 582 folder_count = len([x.folder for x in self.changelog_entries]) 583 assert folder_count == len(self.changelog_entries) 584 585 mentioned_files = set() 586 mentioned_patterns = [] 587 used_patterns = set() 588 for entry in self.changelog_entries: 589 if not entry.files and not entry.file_patterns: 590 msg = 'no files mentioned for ChangeLog in directory' 591 self.errors.append(Error(msg, entry.folder)) 592 assert not entry.folder.endswith('/') 593 for file in entry.files: 594 if not self.is_changelog_filename(file): 595 mentioned_files.add(os.path.join(entry.folder, file)) 596 for pattern in entry.file_patterns: 597 mentioned_patterns.append(os.path.join(entry.folder, pattern)) 598 599 cand = [x[0] for x in self.info.modified_files 600 if not self.is_changelog_filename(x[0])] 601 changed_files = set(cand) 602 for file in sorted(mentioned_files - changed_files): 603 msg = 'unchanged file mentioned in a ChangeLog' 604 candidates = difflib.get_close_matches(file, changed_files, 1) 605 if candidates: 606 msg += f' (did you mean "{candidates[0]}"?)' 607 self.errors.append(Error(msg, file)) 608 for file in sorted(changed_files - mentioned_files): 609 if not self.in_ignored_location(file): 610 if file in self.new_files: 611 changelog_location = self.get_changelog_by_path(file) 612 # Python2: we cannot use next(filter(...)) 613 entries = filter(lambda x: x.folder == changelog_location, 614 self.changelog_entries) 615 entries = list(entries) 616 entry = entries[0] if entries else None 617 if not entry: 618 prs = self.top_level_prs 619 if not prs: 620 # if all ChangeLog entries have identical PRs 621 # then use them 622 prs = self.changelog_entries[0].prs 623 for entry in self.changelog_entries: 624 if entry.prs != prs: 625 prs = [] 626 break 627 entry = ChangeLogEntry(changelog_location, 628 self.top_level_authors, 629 prs) 630 self.changelog_entries.append(entry) 631 # strip prefix of the file 632 assert file.startswith(entry.folder) 633 file = file[len(entry.folder):].lstrip('/') 634 entry.lines.append('\t* %s: New file.' % file) 635 entry.files.append(file) 636 else: 637 used_pattern = [p for p in mentioned_patterns 638 if file.startswith(p)] 639 used_pattern = used_pattern[0] if used_pattern else None 640 if used_pattern: 641 used_patterns.add(used_pattern) 642 else: 643 msg = 'changed file not mentioned in a ChangeLog' 644 self.errors.append(Error(msg, file)) 645 646 for pattern in mentioned_patterns: 647 if pattern not in used_patterns: 648 error = "pattern doesn't match any changed files" 649 self.errors.append(Error(error, pattern)) 650 651 def check_for_correct_changelog(self): 652 for entry in self.changelog_entries: 653 for file in entry.files: 654 full_path = os.path.join(entry.folder, file) 655 changelog_location = self.get_changelog_by_path(full_path) 656 if changelog_location != entry.folder: 657 msg = 'wrong ChangeLog location "%s", should be "%s"' 658 err = Error(msg % (entry.folder, changelog_location), file) 659 self.errors.append(err) 660 661 @classmethod 662 def format_authors_in_changelog(cls, authors, timestamp, prefix=''): 663 output = '' 664 for i, author in enumerate(authors): 665 if i == 0: 666 output += '%s%s %s\n' % (prefix, timestamp, author) 667 else: 668 output += '%s\t %s\n' % (prefix, author) 669 output += '\n' 670 return output 671 672 def to_changelog_entries(self, use_commit_ts=False): 673 current_timestamp = self.info.date.strftime(DATE_FORMAT) 674 for entry in self.changelog_entries: 675 output = '' 676 timestamp = entry.datetime 677 if self.revert_commit: 678 timestamp = current_timestamp 679 orig_date = self.original_info.date 680 current_timestamp = orig_date.strftime(DATE_FORMAT) 681 elif self.cherry_pick_commit: 682 info = self.commit_to_info_hook(self.cherry_pick_commit) 683 # it can happen that it is a cherry-pick for a different 684 # repository 685 if info: 686 timestamp = info.date.strftime(DATE_FORMAT) 687 else: 688 timestamp = current_timestamp 689 elif not timestamp or use_commit_ts: 690 timestamp = current_timestamp 691 authors = entry.authors if entry.authors else [self.info.author] 692 # add Co-Authored-By authors to all ChangeLog entries 693 for author in self.co_authors: 694 if author not in authors: 695 authors.append(author) 696 697 if self.cherry_pick_commit or self.revert_commit: 698 original_author = self.original_info.author 699 output += self.format_authors_in_changelog([original_author], 700 current_timestamp) 701 if self.revert_commit: 702 output += '\tRevert:\n' 703 else: 704 output += '\tBackported from master:\n' 705 output += self.format_authors_in_changelog(authors, 706 timestamp, '\t') 707 else: 708 output += self.format_authors_in_changelog(authors, timestamp) 709 for pr in entry.prs: 710 output += '\t%s\n' % pr 711 for line in entry.lines: 712 output += line + '\n' 713 yield (entry.folder, output.rstrip()) 714 715 def print_output(self): 716 for entry, output in self.to_changelog_entries(): 717 print('------ %s/ChangeLog ------ ' % entry) 718 print(output) 719 720 def print_errors(self): 721 print('Errors:') 722 for error in self.errors: 723 print(error) 724