xref: /netbsd-src/distrib/notes/common/extract-contrib-string.pl (revision 7e907697d9a3b0b20aaa121e3b957d5776c2ddfd)
1#!/usr/bin/env perl
2#
3# Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
4# All rights reserved.
5#
6# This code is derived from software contributed to The NetBSD Foundation
7# by Hubert Feyrer <hubert@feyrer.de>.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17#
18# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28# POSSIBILITY OF SUCH DAMAGE.
29
30#
31# Extract BSD-mandated copyright messages for NetBSD documentation
32#
33# Usage:
34# 1) find src xsrc -type f -print \
35#    | perl extract-contrib-string.pl
36#    >x
37#
38# 2) merge text after "--------" in "x" into
39#    src/distrib/notes/common/legal.common
40#
41# Options:
42#
43#     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
44#
45# where
46#     -d  debug output
47#     -h  html output
48#     -x  xml/docbook output
49#     -?  display help/usage message
50
51
52$ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?'
53    .      '|\d\. Redistributions of any form whatsoever)';
54$ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)';
55$ack_endline=
56      '(\d\.\s*(Neither the name'
57    .         '|The name of the company nor the name'	# Wasn't my idea
58    .         '|The name of the author may not'
59    .         '|The name of .* must not be used to endorse'
60    .         '|The names? (of )?.* nor the names? of'
61    .         '|The names? (of )?.* or any of it\'?s members'
62    .         '|Redistributions of any form whatsoever'
63    .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
64    .         "|Urbana-Champaign Independent Media Center's name"
65    . '))'
66    .'|(^Neither the name)'
67    .'|(THIS SOFTWARE IS PROVIDED)'
68    .'|(ALL WARRANTIES WITH REGARD)'
69    .'|(The word \'cryptographic\' can be left out if)'
70    .'|(may be used to endorse)'
71    .'|(@end cartouche)'
72    .'|(</para>)'
73    .'|(Redistribution and use in source and binary forms)'
74    .'|(may not be used to endorse)'
75    .'|(\.IP 4)'
76    .'|(ALLOW FREE USE OF)'
77    .'|(materials provided with the distribution)'
78    .'|(@InsertRedistribution@)';
79
80$known_bad_clause_3_wording=
81      'usr.bin/lex/.*'				# UCB
82    .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
83    .'|usr.sbin/traceroute/ifaddrlist.h'	#
84    .'|usr.sbin/traceroute/traceroute.c'	#
85    .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
86    ;
87
88sub warning {
89    local($fn,$msg) = @_;
90    print "XXX $fn line $.: $msg\n"
91}
92
93while ($#ARGV >= 0) {
94    $debug=1 if ($ARGV[0] =~ /-d/i);
95    $html=1  if ($ARGV[0] =~ /-h/i);
96    $xml=1  if ($ARGV[0] =~ /-x/i);
97    $usage=1  if ($ARGV[0] =~ /-\?/);
98    shift(@ARGV);
99}
100
101if ($usage) {
102    print "usage: find /usr/src -type f -print |\n" .
103	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
104	"   where\n" .
105	"    -h   output html\n" .
106	"    -x   output xml/docbook\n" .
107	"    -d   debug\n" .
108	"    -?   display this help message\n";
109    exit(0);
110}
111
112$comments = !$html && !$xml;
113
114file:
115while(<>) {
116    chomp();
117    $fn=$_;
118
119    open(F, "$fn") || die "cannot read $fn: $!\n";
120
121  line:
122    while(<F>) {
123	if (0 and /$ack_line2/in){
124	    print "?> $_" if $debug;
125
126	    if ($fn !~ m,$known_bad_clause_3_wording,) {
127		warning($fn, "clause 3 start not caught");
128	    }
129	    last line;
130	}
131
132	print "0> $_" if $debug;
133
134	# special case perl script generating a license (openssl's
135	# mkerr.pl) - ignore the quoted license, there is another one
136	# inside:
137	if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) {
138		while(!/$ack_endline/in) {
139		    print "S> $_" if $debug;
140		    $_ = <F>;
141		}
142	}
143
144	if (/$ack_line1/in
145	    or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) {
146
147	    print "1> $_" if $debug;
148
149	    $_=<F>
150		unless $fn =~ m,$known_bad_clause_3_wording,;
151	    if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){
152
153		print "2> $_" if $debug;
154
155		$msg="";
156
157		if ($fn =~ m,$known_bad_clause_3_wording, and /``/) {
158		    $msg = $_;
159		}
160		elsif (/:\s+This product/) {
161		    # src/sys/lib/libkern/rngtest.c - bad clause 3 wording
162		    # that is not like others, so special case it here
163		    $msg = $_;
164		    $msg =~ s/^.*:\s+(This product.*)$/$1/;
165		}
166
167		$cnt=0;
168		$_=<F>;
169		while(!/$ack_endline/in) {
170
171		    print "C> $_" if $debug;
172
173		    $msg .= $_;
174		    $cnt++;
175		    $_ = <F>;
176		    if ($cnt > 10) {
177			warning($fn,"loooong copyright?");
178			last line;
179		    }
180		}
181
182		print "E> $_" if $debug;
183
184		# post-process
185
186		if ($fn =~ m,$known_bad_clause_3_wording,) {
187			while ($msg !~ /^.*``.*\n/) {
188				last if (!$msg);
189				$msg =~ s/^.*\n//o;
190			}
191			$msg =~ s/^.*``//o;
192			$msg =~ s/\n.*``//o;
193			$msg =~ s/''.*$//o;
194		}
195
196		# XXX: pcap &c - add to known_bad_clause_3_wording but
197		# that code seems to have problems.  Easier to add a
198		# hack here, shouldn't affect good clause 3.
199		$msg =~ s/''\s+Neither the name.*$//;
200
201		# *roff
202		while ($msg =~ /^\.\\"\s*/) {
203			$msg =~ s/^\.\\"\s*//o;
204		}
205		while ($msg =~ /\n\.\\"\s*/) {
206			$msg =~ s/\n\.\\"\s*/\n/o;
207		}
208		$msg =~ s/\n\.\\"\s*$/\n/g;
209
210		# C++/C99
211		while ($msg =~ /^\s*\/\/\s*/) {
212			$msg =~ s/^\s*\/\/\s*//o;
213		}
214		while ($msg =~ /\n\s*\/\/\s*$/) {
215			$msg =~ s/\n\s*\/\/\s*$//o;
216		}
217		$msg =~ s/\n\s*\/\/\s*/\n/g;
218
219		# C
220		while ($msg =~ /^\s*\*\s*/) {
221			$msg =~ s/^\s*\*\s*//o;
222		}
223		while ($msg =~ /\n\s*\*\s*$/) {
224			$msg =~ s/\n\s*\*\s*$//o;
225		}
226		$msg =~ s/\n\s*\*\s*/\n/g;
227
228		# texinfo @c
229		while ($msg =~ /^\s*\@c\s+/) {
230			$msg =~ s/^\s*\@c\s+//o;
231		}
232		while ($msg =~ /\n\s*\@c\s+$/) {
233			$msg =~ s/\n\s*\@c\s+$//o;
234		}
235		$msg =~ s/\n\s*\@c\s+/\n/g;
236
237		$msg =~ s/^REM\s*//g;			# BASIC?!?
238		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
239		$msg =~ s/^dnl\s*//g;			# m4
240		$msg =~ s/\ndnl\s*/\n/g;		# m4
241		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
242		$msg =~ s/\n\s+-\s+/ /g;		#
243		$msg =~ s/^[#\\\|";]+\s*//g;		# sh etc.
244		$msg =~ s/\n[#\\\|";]+\s*/\n/g;		# sh etc.
245		$msg =~ s/^[ 	*]*//g;      		# C
246		$msg =~ s/\n[ 	*]*/\n/g;    		# C
247
248		$msg =~ s/\@cartouche\n//;              # texinfo
249
250		$msg =~ s/
251//g;
252		$msg =~ s/\s*\n/\n/g;
253		$msg =~ s/^\s*//;
254		$msg =~ s/\\\@/\@/g;
255		$msg =~ s/\n\n/\n/g;
256	        $msg =~ s/^\s*``//;
257	        $msg =~ s/''\s*$//;
258		$msg =~ s/^\"//o;
259		$msg =~ s/\"$//o;
260		$msg =~ s/\"\.$/./o;
261
262		# Fix ISO-646-SE spelling of Lule\(oa
263		$msg =~ s/Lule\}/Lule\\(oa/g;
264
265		# Collapse multiple spaces between words.  There are a
266		# few entries with "by__Name" that affects sorting.
267		$msg =~ s/(\w)  +(\w)/$1 $2/g;
268
269		# Split up into separate paragraphs
270		#
271		$msgs=$msg;
272		$msgs=~s/(This (software|product))/|$1/g;
273		$msgs=~s,^\|,,;
274	      msg:
275		foreach $msg (split(/\|/, $msgs)) {
276		    while ($msg =~ /[\n\s]+$/) {
277			$msg =~ s/[\n\s]+$//o;
278		    }
279		    next if ($msg eq "");
280		    if ($comments) {
281			print ".\\\" File $fn:\n";
282			print "$msg";
283			print "\n\n";
284		    }
285
286		    my $key = lc($msg);	# ignore difference in case
287		    $key =~ s/\n/ /g;	# ignore difference in line breaks
288		    $key =~ s/\.$//g;	# drop the final dot
289
290		    # push organizations ("by the") to the end of the
291		    # sorting order
292		    $key =~ s/(developed by) the/$1 ~the/;
293
294		    if (defined $copyrights{$key}) {
295			if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
296			    print "already there, without dot - overriding!\n"
297				if 1 || $debug;
298			}
299			else {
300			    next msg;
301			}
302		    }
303
304		    $copyrights{$key} = $msg;
305		}
306
307	    } else {
308		print "?> $_" if $debug;
309
310                if ($fn !~ m,$known_bad_clause_3_wording,) {
311		    warning($fn, "bad clause 3?");
312                }
313		last line;
314	    }
315	}
316    }
317    close(F);
318}
319
320
321if ($html) {
322    print "<ul>\n";
323    foreach $key (sort keys %copyrights) {
324	my $msg = $copyrights{$key};
325	print "<li>$msg</li>\n";
326    }
327    print "</ul>\n";
328} elsif ($xml) {
329    foreach $key (sort keys %copyrights) {
330	my $msg = $copyrights{$key};
331	print "<listitem>$msg</listitem>\n";
332    }
333} else {
334    print "------------------------------------------------------------\n";
335
336    $firsttime=1;
337    foreach $key (sort keys %copyrights) {
338	my $msg = $copyrights{$key};
339	if ($firsttime) {
340	    $firsttime=0;
341	} else {
342	    print ".It\n";
343	}
344	print "$msg\n";
345    }
346}
347