xref: /openbsd-src/gnu/usr.bin/perl/Porting/core-cpan-diff (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41	qw(.cvsignore .dualLivedDiffConfig .gitignore
42	      ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43	      CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44	      GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45	      MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46	      SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD  => 'diff';
53use constant WGET_CMD  => 'wget';
54
55sub usage {
56    print STDERR "\n@_\n\n" if @_;
57    print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all      Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63              (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff     Display file differences using diff(1), rather than just
66              listing which files have changed.
67              The diff(1) command is assumed to be in your PATH.
68
69--diffopts    Options to pass to the diff command. Defaults to '-u'.
70
71-f|force      Force download from CPAN of new 02packages.details.txt file
72              (with --crosscheck only).
73
74-o/--output   File name to write output to (defaults to STDOUT).
75
76-r/--reverse  Reverses the diff (perl to CPAN).
77
78-u/--upstream only print modules with the given upstream (defaults to all)
79
80-v/--verbose  List the fate of *all* files in the tarball, not just those
81              that differ or are missing.
82
83-x|crosscheck List the distributions whose current CPAN version differs from
84              that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
85
86By default (i.e. without the --crosscheck option),  for each listed module
87(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
88from CPAN associated with that module, and compare the files in it with
89those in the perl source tree.
90
91Must be run from the root of the perl source tree.
92Module names must match the keys of %Modules in Maintainers.pl.
93HERE
94    exit(1);
95}
96
97
98sub run {
99    my $scan_all;
100    my $diff_opts;
101    my $reverse    = 0;
102    my @wanted_upstreams;
103    my $cache_dir;
104    my $use_diff;
105    my $output_file;
106    my $verbose;
107    my $force;
108    my $do_crosscheck;
109
110    GetOptions(
111	'a|all'        => \$scan_all,
112	'c|cachedir=s' => \$cache_dir,
113	'd|diff'       => \$use_diff,
114	'diffopts:s'   => \$diff_opts,
115	'f|force'      => \$force,
116	'h|help'       => \&usage,
117	'o|output=s'   => \$output_file,
118	'r|reverse'    => \$reverse,
119	'u|upstream=s@'=> \@wanted_upstreams,
120	'v|verbose'    => \$verbose,
121	'x|crosscheck' => \$do_crosscheck,
122    ) or usage;
123
124
125    my @modules;
126
127    usage("Cannot mix -a with module list") if $scan_all && @ARGV;
128
129    if ($do_crosscheck) {
130	usage("can't use -r, -d, --diffopts, -v with --crosscheck")
131	    if ($reverse || $use_diff || $diff_opts || $verbose);
132    }
133    else {
134	$diff_opts = '-u' unless defined $diff_opts;
135	usage("can't use -f without --crosscheck") if $force;
136    }
137
138    @modules = $scan_all
139	        ? grep $Maintainers::Modules{$_}{CPAN},
140		    (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
141	        : @ARGV;
142    usage("No modules specified") unless @modules;
143
144
145    my $outfh;
146    if (defined $output_file) {
147	open $outfh, '>', $output_file
148	    or die "ERROR: could not open file '$output_file' for writing: $!\n";
149    }
150    else {
151	open $outfh, ">&STDOUT"
152			    or die "ERROR: can't dup STDOUT: $!\n";
153    }
154
155    if (defined $cache_dir) {
156	die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
157    }
158
159    if ($do_crosscheck) {
160	do_crosscheck($outfh, $cache_dir, $force, \@modules);
161    }
162    else {
163	do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
164	    $reverse, $diff_opts, \@wanted_upstreams);
165    }
166}
167
168
169
170# compare a list of modules against their CPAN equivalents
171
172sub do_compare {
173    my ($modules, $outfh, $output_file, $cache_dir, $verbose,
174		$use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_;
175
176
177    # first, make sure we have a directory where they can all be untarred,
178    # and if its a permanent directory, clear any previous content
179    my $untar_dir;
180    if ($cache_dir) {
181	$untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
182	if (-d $untar_dir) {
183	    File::Path::rmtree($untar_dir)
184		    or die "failed to remove $untar_dir\n";
185	}
186	mkdir $untar_dir
187	    or die "mkdir $untar_dir: $!\n";
188    }
189    else {
190	$untar_dir = File::Temp::tempdir( CLEANUP => 1 );
191    }
192
193    my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
194
195    my %seen_dist;
196    for my $module (@$modules) {
197	warn "Processing $module ...\n" if defined $output_file;
198
199	my $m = $Maintainers::Modules{$module}
200	    or die "ERROR: No such module in Maintainers.pl: '$module'\n";
201
202	unless ($m->{CPAN}) {
203	    print $outfh "WARNING: $module is not dual-life; skipping\n";
204	    next;
205	}
206
207	my $dist = $m->{DISTRIBUTION};
208	die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
209
210	if ($seen_dist{$dist}) {
211	    warn "WARNING: duplicate entry for $dist in $module\n"
212	}
213
214	my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
215	next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams);
216	print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff;
217	print $outfh "  upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n";
218
219	$seen_dist{$dist}++;
220
221	my $cpan_dir;
222	eval {
223	    $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
224	};
225	if ($@) {
226	    print $outfh "  ", $@;
227	    print $outfh "  (skipping)\n";
228	    next;
229	}
230
231	my @perl_files = Maintainers::get_module_files($module);
232
233	my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
234	die "ERROR: no such file: $manifest\n" unless  -f $manifest;
235
236	my $cpan_files = ExtUtils::Manifest::maniread($manifest);
237	my @cpan_files = sort keys %$cpan_files;
238
239	my ($excluded, $map) =  get_map($m, $module, \@perl_files);
240
241	my %perl_unseen;
242	@perl_unseen{@perl_files} = ();
243	my %perl_files = %perl_unseen;
244
245	foreach my $cpan_file (@cpan_files) {
246	    my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
247	    unless (defined $mapped_file) {
248	        print $outfh "  Excluded:  $cpan_file\n" if $verbose;
249	        next;
250	    }
251
252	    if (exists $perl_files{$mapped_file}) {
253	        delete $perl_unseen{$mapped_file};
254	    }
255	    else {
256	        # some CPAN files foo are stored in core as foo.packed,
257	        # which are then unpacked by 'make test_prep'
258	        my $packed_file = "$mapped_file.packed";
259	        if (exists $perl_files{$packed_file} ) {
260	            if (! -f $mapped_file and -f $packed_file) {
261	                print $outfh <<EOF;
262WARNING: $mapped_file not found, but .packed variant exists.
263Perhaps you need to run 'make test_prep'?
264EOF
265	                next;
266	            }
267	            delete $perl_unseen{$packed_file};
268	        }
269	        else {
270	            if ($ignorable{$cpan_file}) {
271	                print $outfh "  Ignored:   $cpan_file\n" if $verbose;
272	                next;
273	            }
274
275	            unless ($use_diff) {
276	                print $outfh "  CPAN only: $cpan_file",
277	                    ($cpan_file eq $mapped_file) ? "\n"
278				: " (expected $mapped_file)\n";
279	            }
280	            next;
281	        }
282	    }
283
284
285	    my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
286
287	    # should never happen
288	    die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
289
290	    # might happen if the FILES entry in Maintainers.pl is wrong
291	    unless (-f $mapped_file) {
292	        print $outfh "WARNING: perl file not found: $mapped_file\n";
293	        next;
294	    }
295
296			my $relative_mapped_file = $mapped_file;
297			$relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///;
298
299	    if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
300
301
302	         if ($use_diff) {
303	            file_diff($outfh, $abs_cpan_file, $mapped_file,
304	                                $reverse, $diff_opts);
305	        }
306	        else {
307	            if ($cpan_file eq $relative_mapped_file) {
308	                print $outfh "  Modified:  $relative_mapped_file\n";
309	            }
310	            else {
311	                print $outfh "  Modified:  $cpan_file $relative_mapped_file\n";
312	            }
313	        }
314	    }
315	    elsif ($verbose) {
316	            if ($cpan_file eq $relative_mapped_file) {
317	                print $outfh "  Unchanged: $cpan_file\n";
318	            }
319	            else {
320	                print $outfh "  Unchanged: $cpan_file $relative_mapped_file\n";
321	            }
322	    }
323	}
324	for (sort keys %perl_unseen) {
325	    print $outfh "  Perl only: $_\n" unless $use_diff;
326	}
327    }
328}
329
330# given FooBar-1.23_45.tar.gz, return FooBar
331
332sub distro_base {
333    my $d = shift;
334    $d =~ s/\.tar\.gz$//;
335    $d =~ s/\.gip$//;
336    $d =~ s/[\d\-_\.]+$//;
337    return $d;
338}
339
340# process --crosscheck action:
341# ie list all distributions whose CPAN versions differ from that listed in
342# Maintainers.pl
343
344sub do_crosscheck {
345    my ($outfh, $cache_dir, $force, $modules) = @_;
346
347    my $file = '02packages.details.txt';
348    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
349    my $path = File::Spec->catfile($download_dir, $file);
350    my $gzfile = "$path.gz";
351
352    # grab 02packages.details.txt
353
354    my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
355
356    if (! -f $gzfile or $force) {
357	unlink $gzfile;
358	my_getstore($url, $gzfile);
359    }
360    unlink $path;
361    IO::Uncompress::Gunzip::gunzip($gzfile, $path)
362	or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
363
364    # suck in the data from it
365
366    open my $fh, '<', $path
367	or die "ERROR: open: $file: $!\n";
368
369    my %distros;
370    my %modules;
371
372    while (<$fh>) {
373	next if 1../^$/;
374	chomp;
375	my @f = split ' ', $_;
376	if (@f != 3) {
377	    warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
378	    next;
379	}
380	my $distro = $f[2];
381	$distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
382	$modules{$f[0]} = $distro;
383
384	(my $short_distro = $distro) =~ s{^.*/}{};
385
386	$distros{distro_base($short_distro)}{$distro} = 1;
387    }
388
389    for my $module (@$modules) {
390	my $m = $Maintainers::Modules{$module}
391	    or die "ERROR: No such module in Maintainers.pl: '$module'\n";
392
393	unless ($m->{CPAN}) {
394	    print $outfh "\nWARNING: $module is not dual-life; skipping\n";
395	    next;
396	}
397
398	# given an entry like
399	#   Foo::Bar 1.23 foo-bar-1.23.tar.gz,
400	# first compare the module name against Foo::Bar, and failing that,
401	# against foo-bar
402
403	my $pdist = $m->{DISTRIBUTION};
404	die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
405
406	my $cdist = $modules{$module};
407	(my $short_pdist = $pdist) =~ s{^.*/}{};
408
409	unless (defined $cdist) {
410	    my $d = $distros{distro_base($short_pdist)};
411	    unless (defined $d) {
412	        print $outfh "\n$module: Can't determine current CPAN entry\n";
413	        next;
414	    }
415	    if (keys %$d > 1) {
416	        print $outfh "\n$module: (found more than one CPAN candidate):\n";
417	        print $outfh "    perl: $pdist\n";
418	        print $outfh "    CPAN: $_\n" for sort keys %$d;
419	        next;
420	    }
421	    $cdist = (keys %$d)[0];
422	}
423
424	if ($cdist ne $pdist) {
425	    print $outfh "\n$module:\n    Perl: $pdist\n    CPAN: $cdist\n";
426	}
427    }
428}
429
430
431
432# get the EXCLUDED and MAP entries for this module, or
433# make up defauts if they don't exist
434
435sub get_map {
436    my ($m, $module_name, $perl_files) = @_;
437
438    my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
439
440    $excluded ||= [];
441
442    return $excluded, $map if $map;
443
444    # all files under ext/foo-bar (plus maybe some under t/lib)???
445
446    my $ext;
447    for (@$perl_files) {
448	if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
449	    if (defined $ext and $ext ne $1) {
450	        # more than one ext/$ext/
451	        undef $ext;
452	        last;
453	    }
454	    $ext = $1;
455	}
456	elsif (m{^t/lib/}) {
457	    next;
458	}
459	else {
460	    undef $ext;
461	    last;
462	}
463    }
464
465    if (defined $ext) {
466	    $map = { '' => $ext },
467    }
468    else {
469	(my $base = $module_name) =~ s{::}{/}g;
470	$base ="lib/$base";
471	$map = {
472	    'lib/'	=> 'lib/',
473	    ''	=> "$base/",
474	};
475    }
476    return $excluded, $map;
477}
478
479
480# Given an exclude list and a mapping hash, convert a CPAN filename
481# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
482# Returns an empty list for an excluded file
483
484sub cpan_to_perl {
485    my ($excluded, $map, $cpan_file) = @_;
486
487    for my $exclude (@$excluded) {
488	# may be a simple string to match exactly, or a pattern
489	if (ref $exclude) {
490	    return if $cpan_file =~ $exclude;
491	}
492	else {
493	    return if $cpan_file eq $exclude;
494	}
495    }
496
497    my $perl_file = $cpan_file;
498
499    # try longest prefix first, then alphabetically on tie-break
500    for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
501    {
502	last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
503    }
504    return $perl_file;
505}
506
507
508
509# do LWP::Simple::getstore, possibly without LWP::Simple being available
510
511my $lwp_simple_available;
512
513sub my_getstore {
514    my ($url, $file) = @_;
515    unless (defined $lwp_simple_available) {
516	eval { require LWP::Simple };
517	$lwp_simple_available = $@ eq '';
518    }
519    if ($lwp_simple_available) {
520	return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
521    }
522    else {
523	return system(WGET_CMD, "-O", $file, $url) == 0;
524    }
525}
526
527
528# download and unpack a distribution
529# Returns the full pathname of the extracted directory
530# (eg '/tmp/XYZ/Foo_bar-1.23')
531
532# cache_dir: where to dowenload the .tar.gz file to
533# untar_dir: where to untar or unzup the file
534# module:    name of module
535# dist:      name of the distribution
536
537sub get_distribution {
538    my ($cache_dir, $untar_dir, $module, $dist) = @_;
539
540    $dist =~ m{.+/([^/]+)$}
541	or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
542    my $filename = $1;
543
544    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
545    my $download_file = File::Spec->catfile($download_dir, $filename);
546
547    # download distribution
548
549    if (-f $download_file and ! -s $download_file ) {
550	# wget can leave a zero-length file on failed download
551	unlink $download_file;
552    }
553
554    unless (-f $download_file) {
555	# not cached
556	$dist =~ /^([A-Z])([A-Z])/
557	    or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
558
559	my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
560	my_getstore($url, $download_file)
561	    or die "ERROR: Could not fetch '$url'\n";
562    }
563
564    # extract distribution
565
566    my $ae = Archive::Extract->new( archive => $download_file);
567    $ae->extract( to => $untar_dir )
568	or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
569
570    # get the name of the extracted distribution dir
571
572    my $path = File::Spec->catfile($untar_dir, $filename);
573
574    $path =~ s/\.tar\.gz$// or
575    $path =~ s/\.zip$// or
576      die "ERROR: downloaded file does not have a recognised suffix: $path\n";
577
578    die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
579
580    return $path;
581}
582
583
584# produce the diff of a single file
585sub file_diff {
586    my $outfh     = shift;
587    my $cpan_file = shift;
588    my $perl_file = shift;
589    my $reverse   = shift;
590    my $diff_opts = shift;
591
592
593    my @cmd = (DIFF_CMD, split ' ', $diff_opts);
594    if ($reverse) {
595	push @cmd, $perl_file, $cpan_file;
596    }
597    else {
598	push @cmd, $cpan_file, $perl_file;
599    }
600    my $result = `@cmd`;
601
602    $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
603
604    print $outfh $result;
605}
606
607
608run();
609
610