xref: /netbsd-src/external/gpl3/gcc.old/dist/contrib/update-copyright.py (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1#!/usr/bin/python
2#
3# Copyright (C) 2013-2017 Free Software Foundation, Inc.
4#
5# This script is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option)
8# any later version.
9
10# This script adjusts the copyright notices at the top of source files
11# so that they have the form:
12#
13#   Copyright XXXX-YYYY Free Software Foundation, Inc.
14#
15# It doesn't change code that is known to be maintained elsewhere or
16# that carries a non-FSF copyright.
17#
18# The script also doesn't change testsuite files, except those in
19# libstdc++-v3.  This is because libstdc++-v3 has a conformance testsuite,
20# while most tests in other directories are just things that failed at some
21# point in the past.
22#
23# Pass --this-year to the script if you want it to add the current year
24# to all applicable notices.  Pass --quilt if you are using quilt and
25# want files to be added to the quilt before being changed.
26#
27# By default the script will update all directories for which the
28# output has been vetted.  You can instead pass the names of individual
29# directories, including those that haven't been approved.  So:
30#
31#    update-copyright.py --this-year
32#
33# is the command that would be used at the beginning of a year to update
34# all copyright notices (and possibly at other times to check whether
35# new files have been added with old years).  On the other hand:
36#
37#    update-copyright.py --this-year libitm
38#
39# would run the script on just libitm/.
40#
41# Note that things like --version output strings must be updated before
42# this script is run.  There's already a separate procedure for that.
43
44import os
45import re
46import sys
47import time
48import subprocess
49
50class Errors:
51    def __init__ (self):
52        self.num_errors = 0
53
54    def report (self, filename, string):
55        if filename:
56            string = filename + ': ' + string
57        sys.stderr.write (string + '\n')
58        self.num_errors += 1
59
60    def ok (self):
61        return self.num_errors == 0
62
63class GenericFilter:
64    def __init__ (self):
65        self.skip_files = set()
66        self.skip_dirs = set()
67        self.skip_extensions = set()
68        self.fossilised_files = set()
69        self.own_files = set()
70
71        self.skip_files |= set ([
72                # Skip licence files.
73                'COPYING',
74                'COPYING.LIB',
75                'COPYING3',
76                'COPYING3.LIB',
77                'LICENSE',
78                'fdl.texi',
79                'gpl_v3.texi',
80                'fdl-1.3.xml',
81                'gpl-3.0.xml',
82
83                # Skip auto- and libtool-related files
84                'aclocal.m4',
85                'compile',
86                'config.guess',
87                'config.sub',
88                'depcomp',
89                'install-sh',
90                'libtool.m4',
91                'ltmain.sh',
92                'ltoptions.m4',
93                'ltsugar.m4',
94                'ltversion.m4',
95                'lt~obsolete.m4',
96                'missing',
97                'mkdep',
98                'mkinstalldirs',
99                'move-if-change',
100                'shlibpath.m4',
101                'symlink-tree',
102                'ylwrap',
103
104                # Skip FSF mission statement, etc.
105                'gnu.texi',
106                'funding.texi',
107                'appendix_free.xml',
108
109                # Skip imported texinfo files.
110                'texinfo.tex',
111                ])
112
113
114    def get_line_filter (self, dir, filename):
115        if filename.startswith ('ChangeLog'):
116            # Ignore references to copyright in changelog entries.
117            return re.compile ('\t')
118
119        return None
120
121    def skip_file (self, dir, filename):
122        if filename in self.skip_files:
123            return True
124
125        (base, extension) = os.path.splitext (os.path.join (dir, filename))
126        if extension in self.skip_extensions:
127            return True
128
129        if extension == '.in':
130            # Skip .in files produced by automake.
131            if os.path.exists (base + '.am'):
132                return True
133
134            # Skip files produced by autogen
135            if (os.path.exists (base + '.def')
136                and os.path.exists (base + '.tpl')):
137                return True
138
139        # Skip configure files produced by autoconf
140        if filename == 'configure':
141            if os.path.exists (base + '.ac'):
142                return True
143            if os.path.exists (base + '.in'):
144                return True
145
146        return False
147
148    def skip_dir (self, dir, subdir):
149        return subdir in self.skip_dirs
150
151    def is_fossilised_file (self, dir, filename):
152        if filename in self.fossilised_files:
153            return True
154        # Only touch current current ChangeLogs.
155        if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156            return True
157        return False
158
159    def by_package_author (self, dir, filename):
160        return filename in self.own_files
161
162class Copyright:
163    def __init__ (self, errors):
164        self.errors = errors
165
166        # Characters in a range of years.  Include '.' for typos.
167        ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
168
169        # Non-whitespace characters in a copyright holder's name.
170        name = '[\w.,-]'
171
172        # Matches one year.
173        self.year_re = re.compile ('[0-9]+')
174
175        # Matches part of a year or copyright holder.
176        self.continuation_re = re.compile (ranges + '|' + name)
177
178        # Matches a full copyright notice:
179        self.copyright_re = re.compile (
180            # 1: 'Copyright (C)', etc.
181            '([Cc]opyright'
182            '|[Cc]opyright\s+\([Cc]\)'
183            '|[Cc]opyright\s+%s'
184            '|[Cc]opyright\s+©'
185            '|[Cc]opyright\s+@copyright{}'
186            '|copyright = u\''
187            '|@set\s+copyright[\w-]+)'
188
189            # 2: the years.  Include the whitespace in the year, so that
190            # we can remove any excess.
191            '(\s*(?:' + ranges + ',?'
192            '|@value\{[^{}]*\})\s*)'
193
194            # 3: 'by ', if used
195            '(by\s+)?'
196
197            # 4: the copyright holder.  Don't allow multiple consecutive
198            # spaces, so that right-margin gloss doesn't get caught
199            # (e.g. gnat_ugn.texi).
200            '(' + name + '(?:\s?' + name + ')*)?')
201
202        # A regexp for notices that might have slipped by.  Just matching
203        # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
204        # HTML header markers, so check for 'copyright' and two digits.
205        self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
206                                              re.IGNORECASE)
207        self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
208        self.holders = { '@copying': '@copying' }
209        self.holder_prefixes = set()
210
211        # True to 'quilt add' files before changing them.
212        self.use_quilt = False
213
214        # If set, force all notices to include this year.
215        self.max_year = None
216
217        # Goes after the year(s).  Could be ', '.
218        self.separator = ' '
219
220    def add_package_author (self, holder, canon_form = None):
221        if not canon_form:
222            canon_form = holder
223        self.holders[holder] = canon_form
224        index = holder.find (' ')
225        while index >= 0:
226            self.holder_prefixes.add (holder[:index])
227            index = holder.find (' ', index + 1)
228
229    def add_external_author (self, holder):
230        self.holders[holder] = None
231
232    class BadYear():
233        def __init__ (self, year):
234            self.year = year
235
236        def __str__ (self):
237            return 'unrecognised year: ' + self.year
238
239    def parse_year (self, string):
240        year = int (string)
241        if len (string) == 2:
242            if year > 70:
243                return year + 1900
244        elif len (string) == 4:
245            return year
246        raise self.BadYear (string)
247
248    def year_range (self, years):
249        year_list = [self.parse_year (year)
250                     for year in self.year_re.findall (years)]
251        assert len (year_list) > 0
252        return (min (year_list), max (year_list))
253
254    def set_use_quilt (self, use_quilt):
255        self.use_quilt = use_quilt
256
257    def include_year (self, year):
258        assert not self.max_year
259        self.max_year = year
260
261    def canonicalise_years (self, dir, filename, filter, years):
262        # Leave texinfo variables alone.
263        if years.startswith ('@value'):
264            return years
265
266        (min_year, max_year) = self.year_range (years)
267
268        # Update the upper bound, if enabled.
269        if self.max_year and not filter.is_fossilised_file (dir, filename):
270            max_year = max (max_year, self.max_year)
271
272        # Use a range.
273        if min_year == max_year:
274            return '%d' % min_year
275        else:
276            return '%d-%d' % (min_year, max_year)
277
278    def strip_continuation (self, line):
279        line = line.lstrip()
280        match = self.comment_re.match (line)
281        if match:
282            line = line[match.end():].lstrip()
283        return line
284
285    def is_complete (self, match):
286        holder = match.group (4)
287        return (holder
288                and (holder not in self.holder_prefixes
289                     or holder in self.holders))
290
291    def update_copyright (self, dir, filename, filter, file, line, match):
292        orig_line = line
293        next_line = None
294        pathname = os.path.join (dir, filename)
295
296        intro = match.group (1)
297        if intro.startswith ('@set'):
298            # Texinfo year variables should always be on one line
299            after_years = line[match.end (2):].strip()
300            if after_years != '':
301                self.errors.report (pathname,
302                                    'trailing characters in @set: '
303                                    + after_years)
304                return (False, orig_line, next_line)
305        else:
306            # If it looks like the copyright is incomplete, add the next line.
307            while not self.is_complete (match):
308                try:
309                    next_line = file.next()
310                except StopIteration:
311                    break
312
313                # If the next line doesn't look like a proper continuation,
314                # assume that what we've got is complete.
315                continuation = self.strip_continuation (next_line)
316                if not self.continuation_re.match (continuation):
317                    break
318
319                # Merge the lines for matching purposes.
320                orig_line += next_line
321                line = line.rstrip() + ' ' + continuation
322                next_line = None
323
324                # Rematch with the longer line, at the original position.
325                match = self.copyright_re.match (line, match.start())
326                assert match
327
328            holder = match.group (4)
329
330            # Use the filter to test cases where markup is getting in the way.
331            if filter.by_package_author (dir, filename):
332                assert holder not in self.holders
333
334            elif not holder:
335                self.errors.report (pathname, 'missing copyright holder')
336                return (False, orig_line, next_line)
337
338            elif holder not in self.holders:
339                self.errors.report (pathname,
340                                    'unrecognised copyright holder: ' + holder)
341                return (False, orig_line, next_line)
342
343            else:
344                # See whether the copyright is associated with the package
345                # author.
346                canon_form = self.holders[holder]
347                if not canon_form:
348                    return (False, orig_line, next_line)
349
350                # Make sure the author is given in a consistent way.
351                line = (line[:match.start (4)]
352                        + canon_form
353                        + line[match.end (4):])
354
355                # Remove any 'by'
356                line = line[:match.start (3)] + line[match.end (3):]
357
358        # Update the copyright years.
359        years = match.group (2).strip()
360        try:
361            canon_form = self.canonicalise_years (dir, filename, filter, years)
362        except self.BadYear as e:
363            self.errors.report (pathname, str (e))
364            return (False, orig_line, next_line)
365
366        line = (line[:match.start (2)]
367                + ('' if intro.startswith ('copyright = ') else ' ')
368                + canon_form + self.separator
369                + line[match.end (2):])
370
371        # Use the standard (C) form.
372        if intro.endswith ('right'):
373            intro += ' (C)'
374        elif intro.endswith ('(c)'):
375            intro = intro[:-3] + '(C)'
376        line = line[:match.start (1)] + intro + line[match.end (1):]
377
378        # Strip trailing whitespace
379        line = line.rstrip() + '\n'
380
381        return (line != orig_line, line, next_line)
382
383    def process_file (self, dir, filename, filter):
384        pathname = os.path.join (dir, filename)
385        if filename.endswith ('.tmp'):
386            # Looks like something we tried to create before.
387            try:
388                os.remove (pathname)
389            except OSError:
390                pass
391            return
392
393        lines = []
394        changed = False
395        line_filter = filter.get_line_filter (dir, filename)
396        mode = None
397        with open (pathname, 'r') as file:
398            prev = None
399            mode = os.fstat (file.fileno()).st_mode
400            for line in file:
401                while line:
402                    next_line = None
403                    # Leave filtered-out lines alone.
404                    if not (line_filter and line_filter.match (line)):
405                        match = self.copyright_re.search (line)
406                        if match:
407                            res = self.update_copyright (dir, filename, filter,
408                                                         file, line, match)
409                            (this_changed, line, next_line) = res
410                            changed = changed or this_changed
411
412                        # Check for copyright lines that might have slipped by.
413                        elif self.other_copyright_re.search (line):
414                            self.errors.report (pathname,
415                                                'unrecognised copyright: %s'
416                                                % line.strip())
417                    lines.append (line)
418                    line = next_line
419
420        # If something changed, write the new file out.
421        if changed and self.errors.ok():
422            tmp_pathname = pathname + '.tmp'
423            with open (tmp_pathname, 'w') as file:
424                for line in lines:
425                    file.write (line)
426                os.fchmod (file.fileno(), mode)
427            if self.use_quilt:
428                subprocess.call (['quilt', 'add', pathname])
429            os.rename (tmp_pathname, pathname)
430
431    def process_tree (self, tree, filter):
432        for (dir, subdirs, filenames) in os.walk (tree):
433            # Don't recurse through directories that should be skipped.
434            for i in xrange (len (subdirs) - 1, -1, -1):
435                if filter.skip_dir (dir, subdirs[i]):
436                    del subdirs[i]
437
438            # Handle the files in this directory.
439            for filename in filenames:
440                if filter.skip_file (dir, filename):
441                    sys.stdout.write ('Skipping %s\n'
442                                      % os.path.join (dir, filename))
443                else:
444                    self.process_file (dir, filename, filter)
445
446class CmdLine:
447    def __init__ (self, copyright = Copyright):
448        self.errors = Errors()
449        self.copyright = copyright (self.errors)
450        self.dirs = []
451        self.default_dirs = []
452        self.chosen_dirs = []
453        self.option_handlers = dict()
454        self.option_help = []
455
456        self.add_option ('--help', 'Print this help', self.o_help)
457        self.add_option ('--quilt', '"quilt add" files before changing them',
458                         self.o_quilt)
459        self.add_option ('--this-year', 'Add the current year to every notice',
460                         self.o_this_year)
461
462    def add_option (self, name, help, handler):
463        self.option_help.append ((name, help))
464        self.option_handlers[name] = handler
465
466    def add_dir (self, dir, filter = GenericFilter()):
467        self.dirs.append ((dir, filter))
468
469    def o_help (self, option = None):
470        sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
471                          'Options:\n' % sys.argv[0])
472        format = '%-15s %s\n'
473        for (what, help) in self.option_help:
474            sys.stdout.write (format % (what, help))
475        sys.stdout.write ('\nDirectories:\n')
476
477        format = '%-25s'
478        i = 0
479        for (dir, filter) in self.dirs:
480            i += 1
481            if i % 3 == 0 or i == len (self.dirs):
482                sys.stdout.write (dir + '\n')
483            else:
484                sys.stdout.write (format % dir)
485        sys.exit (0)
486
487    def o_quilt (self, option):
488        self.copyright.set_use_quilt (True)
489
490    def o_this_year (self, option):
491        self.copyright.include_year (time.localtime().tm_year)
492
493    def main (self):
494        for arg in sys.argv[1:]:
495            if arg[:1] != '-':
496                self.chosen_dirs.append (arg)
497            elif arg in self.option_handlers:
498                self.option_handlers[arg] (arg)
499            else:
500                self.errors.report (None, 'unrecognised option: ' + arg)
501        if self.errors.ok():
502            if len (self.chosen_dirs) == 0:
503                self.chosen_dirs = self.default_dirs
504            if len (self.chosen_dirs) == 0:
505                self.o_help()
506            else:
507                for chosen_dir in self.chosen_dirs:
508                    canon_dir = os.path.join (chosen_dir, '')
509                    count = 0
510                    for (dir, filter) in self.dirs:
511                        if (dir + os.sep).startswith (canon_dir):
512                            count += 1
513                            self.copyright.process_tree (dir, filter)
514                    if count == 0:
515                        self.errors.report (None, 'unrecognised directory: '
516                                            + chosen_dir)
517        sys.exit (0 if self.errors.ok() else 1)
518
519#----------------------------------------------------------------------------
520
521class TopLevelFilter (GenericFilter):
522    def skip_dir (self, dir, subdir):
523        return True
524
525class ConfigFilter (GenericFilter):
526    def __init__ (self):
527        GenericFilter.__init__ (self)
528
529    def skip_file (self, dir, filename):
530        if filename.endswith ('.m4'):
531            pathname = os.path.join (dir, filename)
532            with open (pathname) as file:
533                # Skip files imported from gettext.
534                if file.readline().find ('gettext-') >= 0:
535                    return True
536        return GenericFilter.skip_file (self, dir, filename)
537
538class GCCFilter (GenericFilter):
539    def __init__ (self):
540        GenericFilter.__init__ (self)
541
542        self.skip_files |= set ([
543                # Not part of GCC
544                'math-68881.h',
545                ])
546
547        self.skip_dirs |= set ([
548                # Better not create a merge nightmare for the GNAT folks.
549                'ada',
550
551                # Handled separately.
552                'testsuite',
553                ])
554
555        self.skip_extensions |= set ([
556                # Maintained by the translation project.
557                '.po',
558
559                # Automatically-generated.
560                '.pot',
561                ])
562
563        self.fossilised_files |= set ([
564                # Old news won't be updated.
565                'ONEWS',
566                ])
567
568class TestsuiteFilter (GenericFilter):
569    def __init__ (self):
570        GenericFilter.__init__ (self)
571
572        self.skip_extensions |= set ([
573                # Don't change the tests, which could be woend by anyone.
574                '.c',
575                '.C',
576                '.cc',
577                '.h',
578                '.hs',
579                '.f',
580                '.f90',
581                '.go',
582                '.inc',
583                '.java',
584                ])
585
586    def skip_file (self, dir, filename):
587        # g++.niklas/README contains historical copyright information
588        # and isn't updated.
589        if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
590            return True
591        # Similarly params/README.
592        if filename == 'README' and os.path.basename (dir) == 'params':
593            return True
594        return GenericFilter.skip_file (self, dir, filename)
595
596class LibCppFilter (GenericFilter):
597    def __init__ (self):
598        GenericFilter.__init__ (self)
599
600        self.skip_extensions |= set ([
601                # Maintained by the translation project.
602                '.po',
603
604                # Automatically-generated.
605                '.pot',
606                ])
607
608class LibGCCFilter (GenericFilter):
609    def __init__ (self):
610        GenericFilter.__init__ (self)
611
612        self.skip_dirs |= set ([
613                # Imported from GLIBC.
614                'soft-fp',
615                ])
616
617class LibStdCxxFilter (GenericFilter):
618    def __init__ (self):
619        GenericFilter.__init__ (self)
620
621        self.skip_files |= set ([
622                # Contains no copyright of its own, but quotes the GPL.
623                'intro.xml',
624                ])
625
626        self.skip_dirs |= set ([
627                # Contains automatically-generated sources.
628                'html',
629
630                # The testsuite data files shouldn't be changed.
631                'data',
632
633                # Contains imported images
634                'images',
635                ])
636
637        self.own_files |= set ([
638                # Contains markup around the copyright owner.
639                'spine.xml',
640                ])
641
642    def get_line_filter (self, dir, filename):
643        if filename == 'boost_concept_check.h':
644            return re.compile ('// \(C\) Copyright Jeremy Siek')
645        return GenericFilter.get_line_filter (self, dir, filename)
646
647class GCCCopyright (Copyright):
648    def __init__ (self, errors):
649        Copyright.__init__ (self, errors)
650
651        canon_fsf = 'Free Software Foundation, Inc.'
652        self.add_package_author ('Free Software Foundation', canon_fsf)
653        self.add_package_author ('Free Software Foundation.', canon_fsf)
654        self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
655        self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
656        self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
657        self.add_package_author ('The Free Software Foundation', canon_fsf)
658        self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
659        self.add_package_author ('Software Foundation, Inc.', canon_fsf)
660
661        self.add_external_author ('ARM')
662        self.add_external_author ('AdaCore')
663        self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
664        self.add_external_author ('Cavium Networks.')
665        self.add_external_author ('Faraday Technology Corp.')
666        self.add_external_author ('Florida State University')
667        self.add_external_author ('Greg Colvin and Beman Dawes.')
668        self.add_external_author ('Hewlett-Packard Company')
669        self.add_external_author ('Intel Corporation')
670        self.add_external_author ('Information Technology Industry Council.')
671        self.add_external_author ('James Theiler, Brian Gough')
672        self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
673        self.add_external_author ('National Research Council of Canada.')
674        self.add_external_author ('NVIDIA Corporation')
675        self.add_external_author ('Peter Dimov and Multi Media Ltd.')
676        self.add_external_author ('Peter Dimov')
677        self.add_external_author ('Pipeline Associates, Inc.')
678        self.add_external_author ('Regents of the University of California.')
679        self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
680        self.add_external_author ('Silicon Graphics')
681        self.add_external_author ('Stephen L. Moshier')
682        self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
683        self.add_external_author ('The Go Authors.  All rights reserved.')
684        self.add_external_author ('The Go Authors. All rights reserved.')
685        self.add_external_author ('The Go Authors.')
686        self.add_external_author ('The Regents of the University of California.')
687        self.add_external_author ('Unicode, Inc.')
688        self.add_external_author ('University of Toronto.')
689
690class GCCCmdLine (CmdLine):
691    def __init__ (self):
692        CmdLine.__init__ (self, GCCCopyright)
693
694        self.add_dir ('.', TopLevelFilter())
695        # boehm-gc is imported from upstream.
696        self.add_dir ('config', ConfigFilter())
697        # contrib isn't really part of GCC.
698        self.add_dir ('fixincludes')
699        self.add_dir ('gcc', GCCFilter())
700        self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
701        self.add_dir ('gnattools')
702        self.add_dir ('gotools')
703        self.add_dir ('include')
704        # intl is imported from upstream.
705        self.add_dir ('libada')
706        self.add_dir ('libatomic')
707        self.add_dir ('libbacktrace')
708        self.add_dir ('libcc1')
709        # libcilkrts is imported from upstream.
710        self.add_dir ('libcpp', LibCppFilter())
711        self.add_dir ('libdecnumber')
712        # libffi is imported from upstream.
713        self.add_dir ('libgcc', LibGCCFilter())
714        self.add_dir ('libgfortran')
715        # libgo is imported from upstream.
716        self.add_dir ('libgomp')
717        self.add_dir ('libhsail-rt')
718        self.add_dir ('libiberty')
719        self.add_dir ('libitm')
720        self.add_dir ('libobjc')
721        # liboffloadmic is imported from upstream.
722        self.add_dir ('libquadmath')
723        # libsanitizer is imported from upstream.
724        self.add_dir ('libssp')
725        self.add_dir ('libstdc++-v3', LibStdCxxFilter())
726        self.add_dir ('libvtv')
727        self.add_dir ('lto-plugin')
728        # maintainer-scripts maintainer-scripts
729        # zlib is imported from upstream.
730
731        self.default_dirs = [
732            'gcc',
733            'include',
734            'libada',
735            'libatomic',
736            'libbacktrace',
737            'libcc1',
738            'libcpp',
739            'libdecnumber',
740            'libgcc',
741            'libgfortran',
742            'libgomp',
743            'libhsail-rt',
744            'libiberty',
745            'libitm',
746            'libobjc',
747            'libssp',
748            'libstdc++-v3',
749            'libvtv',
750            'lto-plugin',
751            ]
752
753GCCCmdLine().main()
754