1#!/usr/bin/python 2# 3# Copyright (C) 2013-2017 Free Software Foundation, Inc. 4# 5# This script is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 3, or (at your option) 8# any later version. 9 10# This script adjusts the copyright notices at the top of source files 11# so that they have the form: 12# 13# Copyright XXXX-YYYY Free Software Foundation, Inc. 14# 15# It doesn't change code that is known to be maintained elsewhere or 16# that carries a non-FSF copyright. 17# 18# The script also doesn't change testsuite files, except those in 19# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite, 20# while most tests in other directories are just things that failed at some 21# point in the past. 22# 23# Pass --this-year to the script if you want it to add the current year 24# to all applicable notices. Pass --quilt if you are using quilt and 25# want files to be added to the quilt before being changed. 26# 27# By default the script will update all directories for which the 28# output has been vetted. You can instead pass the names of individual 29# directories, including those that haven't been approved. So: 30# 31# update-copyright.py --this-year 32# 33# is the command that would be used at the beginning of a year to update 34# all copyright notices (and possibly at other times to check whether 35# new files have been added with old years). On the other hand: 36# 37# update-copyright.py --this-year libitm 38# 39# would run the script on just libitm/. 40# 41# Note that things like --version output strings must be updated before 42# this script is run. There's already a separate procedure for that. 43 44import os 45import re 46import sys 47import time 48import subprocess 49 50class Errors: 51 def __init__ (self): 52 self.num_errors = 0 53 54 def report (self, filename, string): 55 if filename: 56 string = filename + ': ' + string 57 sys.stderr.write (string + '\n') 58 self.num_errors += 1 59 60 def ok (self): 61 return self.num_errors == 0 62 63class GenericFilter: 64 def __init__ (self): 65 self.skip_files = set() 66 self.skip_dirs = set() 67 self.skip_extensions = set() 68 self.fossilised_files = set() 69 self.own_files = set() 70 71 self.skip_files |= set ([ 72 # Skip licence files. 73 'COPYING', 74 'COPYING.LIB', 75 'COPYING3', 76 'COPYING3.LIB', 77 'LICENSE', 78 'fdl.texi', 79 'gpl_v3.texi', 80 'fdl-1.3.xml', 81 'gpl-3.0.xml', 82 83 # Skip auto- and libtool-related files 84 'aclocal.m4', 85 'compile', 86 'config.guess', 87 'config.sub', 88 'depcomp', 89 'install-sh', 90 'libtool.m4', 91 'ltmain.sh', 92 'ltoptions.m4', 93 'ltsugar.m4', 94 'ltversion.m4', 95 'lt~obsolete.m4', 96 'missing', 97 'mkdep', 98 'mkinstalldirs', 99 'move-if-change', 100 'shlibpath.m4', 101 'symlink-tree', 102 'ylwrap', 103 104 # Skip FSF mission statement, etc. 105 'gnu.texi', 106 'funding.texi', 107 'appendix_free.xml', 108 109 # Skip imported texinfo files. 110 'texinfo.tex', 111 ]) 112 113 114 def get_line_filter (self, dir, filename): 115 if filename.startswith ('ChangeLog'): 116 # Ignore references to copyright in changelog entries. 117 return re.compile ('\t') 118 119 return None 120 121 def skip_file (self, dir, filename): 122 if filename in self.skip_files: 123 return True 124 125 (base, extension) = os.path.splitext (os.path.join (dir, filename)) 126 if extension in self.skip_extensions: 127 return True 128 129 if extension == '.in': 130 # Skip .in files produced by automake. 131 if os.path.exists (base + '.am'): 132 return True 133 134 # Skip files produced by autogen 135 if (os.path.exists (base + '.def') 136 and os.path.exists (base + '.tpl')): 137 return True 138 139 # Skip configure files produced by autoconf 140 if filename == 'configure': 141 if os.path.exists (base + '.ac'): 142 return True 143 if os.path.exists (base + '.in'): 144 return True 145 146 return False 147 148 def skip_dir (self, dir, subdir): 149 return subdir in self.skip_dirs 150 151 def is_fossilised_file (self, dir, filename): 152 if filename in self.fossilised_files: 153 return True 154 # Only touch current current ChangeLogs. 155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0: 156 return True 157 return False 158 159 def by_package_author (self, dir, filename): 160 return filename in self.own_files 161 162class Copyright: 163 def __init__ (self, errors): 164 self.errors = errors 165 166 # Characters in a range of years. Include '.' for typos. 167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]' 168 169 # Non-whitespace characters in a copyright holder's name. 170 name = '[\w.,-]' 171 172 # Matches one year. 173 self.year_re = re.compile ('[0-9]+') 174 175 # Matches part of a year or copyright holder. 176 self.continuation_re = re.compile (ranges + '|' + name) 177 178 # Matches a full copyright notice: 179 self.copyright_re = re.compile ( 180 # 1: 'Copyright (C)', etc. 181 '([Cc]opyright' 182 '|[Cc]opyright\s+\([Cc]\)' 183 '|[Cc]opyright\s+%s' 184 '|[Cc]opyright\s+©' 185 '|[Cc]opyright\s+@copyright{}' 186 '|copyright = u\'' 187 '|@set\s+copyright[\w-]+)' 188 189 # 2: the years. Include the whitespace in the year, so that 190 # we can remove any excess. 191 '(\s*(?:' + ranges + ',?' 192 '|@value\{[^{}]*\})\s*)' 193 194 # 3: 'by ', if used 195 '(by\s+)?' 196 197 # 4: the copyright holder. Don't allow multiple consecutive 198 # spaces, so that right-margin gloss doesn't get caught 199 # (e.g. gnat_ugn.texi). 200 '(' + name + '(?:\s?' + name + ')*)?') 201 202 # A regexp for notices that might have slipped by. Just matching 203 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of 204 # HTML header markers, so check for 'copyright' and two digits. 205 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]', 206 re.IGNORECASE) 207 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ') 208 self.holders = { '@copying': '@copying' } 209 self.holder_prefixes = set() 210 211 # True to 'quilt add' files before changing them. 212 self.use_quilt = False 213 214 # If set, force all notices to include this year. 215 self.max_year = None 216 217 # Goes after the year(s). Could be ', '. 218 self.separator = ' ' 219 220 def add_package_author (self, holder, canon_form = None): 221 if not canon_form: 222 canon_form = holder 223 self.holders[holder] = canon_form 224 index = holder.find (' ') 225 while index >= 0: 226 self.holder_prefixes.add (holder[:index]) 227 index = holder.find (' ', index + 1) 228 229 def add_external_author (self, holder): 230 self.holders[holder] = None 231 232 class BadYear(): 233 def __init__ (self, year): 234 self.year = year 235 236 def __str__ (self): 237 return 'unrecognised year: ' + self.year 238 239 def parse_year (self, string): 240 year = int (string) 241 if len (string) == 2: 242 if year > 70: 243 return year + 1900 244 elif len (string) == 4: 245 return year 246 raise self.BadYear (string) 247 248 def year_range (self, years): 249 year_list = [self.parse_year (year) 250 for year in self.year_re.findall (years)] 251 assert len (year_list) > 0 252 return (min (year_list), max (year_list)) 253 254 def set_use_quilt (self, use_quilt): 255 self.use_quilt = use_quilt 256 257 def include_year (self, year): 258 assert not self.max_year 259 self.max_year = year 260 261 def canonicalise_years (self, dir, filename, filter, years): 262 # Leave texinfo variables alone. 263 if years.startswith ('@value'): 264 return years 265 266 (min_year, max_year) = self.year_range (years) 267 268 # Update the upper bound, if enabled. 269 if self.max_year and not filter.is_fossilised_file (dir, filename): 270 max_year = max (max_year, self.max_year) 271 272 # Use a range. 273 if min_year == max_year: 274 return '%d' % min_year 275 else: 276 return '%d-%d' % (min_year, max_year) 277 278 def strip_continuation (self, line): 279 line = line.lstrip() 280 match = self.comment_re.match (line) 281 if match: 282 line = line[match.end():].lstrip() 283 return line 284 285 def is_complete (self, match): 286 holder = match.group (4) 287 return (holder 288 and (holder not in self.holder_prefixes 289 or holder in self.holders)) 290 291 def update_copyright (self, dir, filename, filter, file, line, match): 292 orig_line = line 293 next_line = None 294 pathname = os.path.join (dir, filename) 295 296 intro = match.group (1) 297 if intro.startswith ('@set'): 298 # Texinfo year variables should always be on one line 299 after_years = line[match.end (2):].strip() 300 if after_years != '': 301 self.errors.report (pathname, 302 'trailing characters in @set: ' 303 + after_years) 304 return (False, orig_line, next_line) 305 else: 306 # If it looks like the copyright is incomplete, add the next line. 307 while not self.is_complete (match): 308 try: 309 next_line = file.next() 310 except StopIteration: 311 break 312 313 # If the next line doesn't look like a proper continuation, 314 # assume that what we've got is complete. 315 continuation = self.strip_continuation (next_line) 316 if not self.continuation_re.match (continuation): 317 break 318 319 # Merge the lines for matching purposes. 320 orig_line += next_line 321 line = line.rstrip() + ' ' + continuation 322 next_line = None 323 324 # Rematch with the longer line, at the original position. 325 match = self.copyright_re.match (line, match.start()) 326 assert match 327 328 holder = match.group (4) 329 330 # Use the filter to test cases where markup is getting in the way. 331 if filter.by_package_author (dir, filename): 332 assert holder not in self.holders 333 334 elif not holder: 335 self.errors.report (pathname, 'missing copyright holder') 336 return (False, orig_line, next_line) 337 338 elif holder not in self.holders: 339 self.errors.report (pathname, 340 'unrecognised copyright holder: ' + holder) 341 return (False, orig_line, next_line) 342 343 else: 344 # See whether the copyright is associated with the package 345 # author. 346 canon_form = self.holders[holder] 347 if not canon_form: 348 return (False, orig_line, next_line) 349 350 # Make sure the author is given in a consistent way. 351 line = (line[:match.start (4)] 352 + canon_form 353 + line[match.end (4):]) 354 355 # Remove any 'by' 356 line = line[:match.start (3)] + line[match.end (3):] 357 358 # Update the copyright years. 359 years = match.group (2).strip() 360 try: 361 canon_form = self.canonicalise_years (dir, filename, filter, years) 362 except self.BadYear as e: 363 self.errors.report (pathname, str (e)) 364 return (False, orig_line, next_line) 365 366 line = (line[:match.start (2)] 367 + ('' if intro.startswith ('copyright = ') else ' ') 368 + canon_form + self.separator 369 + line[match.end (2):]) 370 371 # Use the standard (C) form. 372 if intro.endswith ('right'): 373 intro += ' (C)' 374 elif intro.endswith ('(c)'): 375 intro = intro[:-3] + '(C)' 376 line = line[:match.start (1)] + intro + line[match.end (1):] 377 378 # Strip trailing whitespace 379 line = line.rstrip() + '\n' 380 381 return (line != orig_line, line, next_line) 382 383 def process_file (self, dir, filename, filter): 384 pathname = os.path.join (dir, filename) 385 if filename.endswith ('.tmp'): 386 # Looks like something we tried to create before. 387 try: 388 os.remove (pathname) 389 except OSError: 390 pass 391 return 392 393 lines = [] 394 changed = False 395 line_filter = filter.get_line_filter (dir, filename) 396 mode = None 397 with open (pathname, 'r') as file: 398 prev = None 399 mode = os.fstat (file.fileno()).st_mode 400 for line in file: 401 while line: 402 next_line = None 403 # Leave filtered-out lines alone. 404 if not (line_filter and line_filter.match (line)): 405 match = self.copyright_re.search (line) 406 if match: 407 res = self.update_copyright (dir, filename, filter, 408 file, line, match) 409 (this_changed, line, next_line) = res 410 changed = changed or this_changed 411 412 # Check for copyright lines that might have slipped by. 413 elif self.other_copyright_re.search (line): 414 self.errors.report (pathname, 415 'unrecognised copyright: %s' 416 % line.strip()) 417 lines.append (line) 418 line = next_line 419 420 # If something changed, write the new file out. 421 if changed and self.errors.ok(): 422 tmp_pathname = pathname + '.tmp' 423 with open (tmp_pathname, 'w') as file: 424 for line in lines: 425 file.write (line) 426 os.fchmod (file.fileno(), mode) 427 if self.use_quilt: 428 subprocess.call (['quilt', 'add', pathname]) 429 os.rename (tmp_pathname, pathname) 430 431 def process_tree (self, tree, filter): 432 for (dir, subdirs, filenames) in os.walk (tree): 433 # Don't recurse through directories that should be skipped. 434 for i in xrange (len (subdirs) - 1, -1, -1): 435 if filter.skip_dir (dir, subdirs[i]): 436 del subdirs[i] 437 438 # Handle the files in this directory. 439 for filename in filenames: 440 if filter.skip_file (dir, filename): 441 sys.stdout.write ('Skipping %s\n' 442 % os.path.join (dir, filename)) 443 else: 444 self.process_file (dir, filename, filter) 445 446class CmdLine: 447 def __init__ (self, copyright = Copyright): 448 self.errors = Errors() 449 self.copyright = copyright (self.errors) 450 self.dirs = [] 451 self.default_dirs = [] 452 self.chosen_dirs = [] 453 self.option_handlers = dict() 454 self.option_help = [] 455 456 self.add_option ('--help', 'Print this help', self.o_help) 457 self.add_option ('--quilt', '"quilt add" files before changing them', 458 self.o_quilt) 459 self.add_option ('--this-year', 'Add the current year to every notice', 460 self.o_this_year) 461 462 def add_option (self, name, help, handler): 463 self.option_help.append ((name, help)) 464 self.option_handlers[name] = handler 465 466 def add_dir (self, dir, filter = GenericFilter()): 467 self.dirs.append ((dir, filter)) 468 469 def o_help (self, option = None): 470 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n' 471 'Options:\n' % sys.argv[0]) 472 format = '%-15s %s\n' 473 for (what, help) in self.option_help: 474 sys.stdout.write (format % (what, help)) 475 sys.stdout.write ('\nDirectories:\n') 476 477 format = '%-25s' 478 i = 0 479 for (dir, filter) in self.dirs: 480 i += 1 481 if i % 3 == 0 or i == len (self.dirs): 482 sys.stdout.write (dir + '\n') 483 else: 484 sys.stdout.write (format % dir) 485 sys.exit (0) 486 487 def o_quilt (self, option): 488 self.copyright.set_use_quilt (True) 489 490 def o_this_year (self, option): 491 self.copyright.include_year (time.localtime().tm_year) 492 493 def main (self): 494 for arg in sys.argv[1:]: 495 if arg[:1] != '-': 496 self.chosen_dirs.append (arg) 497 elif arg in self.option_handlers: 498 self.option_handlers[arg] (arg) 499 else: 500 self.errors.report (None, 'unrecognised option: ' + arg) 501 if self.errors.ok(): 502 if len (self.chosen_dirs) == 0: 503 self.chosen_dirs = self.default_dirs 504 if len (self.chosen_dirs) == 0: 505 self.o_help() 506 else: 507 for chosen_dir in self.chosen_dirs: 508 canon_dir = os.path.join (chosen_dir, '') 509 count = 0 510 for (dir, filter) in self.dirs: 511 if (dir + os.sep).startswith (canon_dir): 512 count += 1 513 self.copyright.process_tree (dir, filter) 514 if count == 0: 515 self.errors.report (None, 'unrecognised directory: ' 516 + chosen_dir) 517 sys.exit (0 if self.errors.ok() else 1) 518 519#---------------------------------------------------------------------------- 520 521class TopLevelFilter (GenericFilter): 522 def skip_dir (self, dir, subdir): 523 return True 524 525class ConfigFilter (GenericFilter): 526 def __init__ (self): 527 GenericFilter.__init__ (self) 528 529 def skip_file (self, dir, filename): 530 if filename.endswith ('.m4'): 531 pathname = os.path.join (dir, filename) 532 with open (pathname) as file: 533 # Skip files imported from gettext. 534 if file.readline().find ('gettext-') >= 0: 535 return True 536 return GenericFilter.skip_file (self, dir, filename) 537 538class GCCFilter (GenericFilter): 539 def __init__ (self): 540 GenericFilter.__init__ (self) 541 542 self.skip_files |= set ([ 543 # Not part of GCC 544 'math-68881.h', 545 ]) 546 547 self.skip_dirs |= set ([ 548 # Better not create a merge nightmare for the GNAT folks. 549 'ada', 550 551 # Handled separately. 552 'testsuite', 553 ]) 554 555 self.skip_extensions |= set ([ 556 # Maintained by the translation project. 557 '.po', 558 559 # Automatically-generated. 560 '.pot', 561 ]) 562 563 self.fossilised_files |= set ([ 564 # Old news won't be updated. 565 'ONEWS', 566 ]) 567 568class TestsuiteFilter (GenericFilter): 569 def __init__ (self): 570 GenericFilter.__init__ (self) 571 572 self.skip_extensions |= set ([ 573 # Don't change the tests, which could be woend by anyone. 574 '.c', 575 '.C', 576 '.cc', 577 '.h', 578 '.hs', 579 '.f', 580 '.f90', 581 '.go', 582 '.inc', 583 '.java', 584 ]) 585 586 def skip_file (self, dir, filename): 587 # g++.niklas/README contains historical copyright information 588 # and isn't updated. 589 if filename == 'README' and os.path.basename (dir) == 'g++.niklas': 590 return True 591 # Similarly params/README. 592 if filename == 'README' and os.path.basename (dir) == 'params': 593 return True 594 return GenericFilter.skip_file (self, dir, filename) 595 596class LibCppFilter (GenericFilter): 597 def __init__ (self): 598 GenericFilter.__init__ (self) 599 600 self.skip_extensions |= set ([ 601 # Maintained by the translation project. 602 '.po', 603 604 # Automatically-generated. 605 '.pot', 606 ]) 607 608class LibGCCFilter (GenericFilter): 609 def __init__ (self): 610 GenericFilter.__init__ (self) 611 612 self.skip_dirs |= set ([ 613 # Imported from GLIBC. 614 'soft-fp', 615 ]) 616 617class LibStdCxxFilter (GenericFilter): 618 def __init__ (self): 619 GenericFilter.__init__ (self) 620 621 self.skip_files |= set ([ 622 # Contains no copyright of its own, but quotes the GPL. 623 'intro.xml', 624 ]) 625 626 self.skip_dirs |= set ([ 627 # Contains automatically-generated sources. 628 'html', 629 630 # The testsuite data files shouldn't be changed. 631 'data', 632 633 # Contains imported images 634 'images', 635 ]) 636 637 self.own_files |= set ([ 638 # Contains markup around the copyright owner. 639 'spine.xml', 640 ]) 641 642 def get_line_filter (self, dir, filename): 643 if filename == 'boost_concept_check.h': 644 return re.compile ('// \(C\) Copyright Jeremy Siek') 645 return GenericFilter.get_line_filter (self, dir, filename) 646 647class GCCCopyright (Copyright): 648 def __init__ (self, errors): 649 Copyright.__init__ (self, errors) 650 651 canon_fsf = 'Free Software Foundation, Inc.' 652 self.add_package_author ('Free Software Foundation', canon_fsf) 653 self.add_package_author ('Free Software Foundation.', canon_fsf) 654 self.add_package_author ('Free Software Foundation Inc.', canon_fsf) 655 self.add_package_author ('Free Software Foundation, Inc', canon_fsf) 656 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf) 657 self.add_package_author ('The Free Software Foundation', canon_fsf) 658 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf) 659 self.add_package_author ('Software Foundation, Inc.', canon_fsf) 660 661 self.add_external_author ('ARM') 662 self.add_external_author ('AdaCore') 663 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.') 664 self.add_external_author ('Cavium Networks.') 665 self.add_external_author ('Faraday Technology Corp.') 666 self.add_external_author ('Florida State University') 667 self.add_external_author ('Greg Colvin and Beman Dawes.') 668 self.add_external_author ('Hewlett-Packard Company') 669 self.add_external_author ('Intel Corporation') 670 self.add_external_author ('Information Technology Industry Council.') 671 self.add_external_author ('James Theiler, Brian Gough') 672 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,') 673 self.add_external_author ('National Research Council of Canada.') 674 self.add_external_author ('NVIDIA Corporation') 675 self.add_external_author ('Peter Dimov and Multi Media Ltd.') 676 self.add_external_author ('Peter Dimov') 677 self.add_external_author ('Pipeline Associates, Inc.') 678 self.add_external_author ('Regents of the University of California.') 679 self.add_external_author ('Silicon Graphics Computer Systems, Inc.') 680 self.add_external_author ('Silicon Graphics') 681 self.add_external_author ('Stephen L. Moshier') 682 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.') 683 self.add_external_author ('The Go Authors. All rights reserved.') 684 self.add_external_author ('The Go Authors. All rights reserved.') 685 self.add_external_author ('The Go Authors.') 686 self.add_external_author ('The Regents of the University of California.') 687 self.add_external_author ('Unicode, Inc.') 688 self.add_external_author ('University of Toronto.') 689 690class GCCCmdLine (CmdLine): 691 def __init__ (self): 692 CmdLine.__init__ (self, GCCCopyright) 693 694 self.add_dir ('.', TopLevelFilter()) 695 # boehm-gc is imported from upstream. 696 self.add_dir ('config', ConfigFilter()) 697 # contrib isn't really part of GCC. 698 self.add_dir ('fixincludes') 699 self.add_dir ('gcc', GCCFilter()) 700 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter()) 701 self.add_dir ('gnattools') 702 self.add_dir ('gotools') 703 self.add_dir ('include') 704 # intl is imported from upstream. 705 self.add_dir ('libada') 706 self.add_dir ('libatomic') 707 self.add_dir ('libbacktrace') 708 self.add_dir ('libcc1') 709 # libcilkrts is imported from upstream. 710 self.add_dir ('libcpp', LibCppFilter()) 711 self.add_dir ('libdecnumber') 712 # libffi is imported from upstream. 713 self.add_dir ('libgcc', LibGCCFilter()) 714 self.add_dir ('libgfortran') 715 # libgo is imported from upstream. 716 self.add_dir ('libgomp') 717 self.add_dir ('libhsail-rt') 718 self.add_dir ('libiberty') 719 self.add_dir ('libitm') 720 self.add_dir ('libobjc') 721 # liboffloadmic is imported from upstream. 722 self.add_dir ('libquadmath') 723 # libsanitizer is imported from upstream. 724 self.add_dir ('libssp') 725 self.add_dir ('libstdc++-v3', LibStdCxxFilter()) 726 self.add_dir ('libvtv') 727 self.add_dir ('lto-plugin') 728 # maintainer-scripts maintainer-scripts 729 # zlib is imported from upstream. 730 731 self.default_dirs = [ 732 'gcc', 733 'include', 734 'libada', 735 'libatomic', 736 'libbacktrace', 737 'libcc1', 738 'libcpp', 739 'libdecnumber', 740 'libgcc', 741 'libgfortran', 742 'libgomp', 743 'libhsail-rt', 744 'libiberty', 745 'libitm', 746 'libobjc', 747 'libssp', 748 'libstdc++-v3', 749 'libvtv', 750 'lto-plugin', 751 ] 752 753GCCCmdLine().main() 754