xref: /openbsd-src/gnu/usr.bin/perl/Porting/core-cpan-diff (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Temp ();
14use File::Path ();
15use File::Spec;
16use Archive::Extract;
17use IO::Uncompress::Gunzip ();
18use File::Compare ();
19use ExtUtils::Manifest;
20
21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
22use lib 'Porting';
23use Maintainers ();
24
25# if running from blead, we may be doing -Ilib, which means when we
26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
27# So preload the things we need, and tell it to check %INC first:
28
29use Archive::Tar;
30use IPC::Open3;
31use IO::Select;
32$Module::Load::Conditional::CHECK_INC_HASH = 1;
33# stop Archive::Extract whinging about lack of Archive::Zip
34$Archive::Extract::WARN = 0;
35
36
37# Files, which if they exist in CPAN but not in perl, will not generate
38# an 'Only in CPAN' listing
39#
40our %IGNORABLE = map { ($_ => 1) }
41	qw(.cvsignore .dualLivedDiffConfig .gitignore
42	      ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL
43	      CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS
44	      GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL
45	      MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README
46	      SIGNATURE THANKS TODO Todo VERSION WHATSNEW);
47
48# where, under the cache dir, to untar stuff to
49
50use constant UNTAR_DIR => 'untarred';
51
52use constant DIFF_CMD  => 'diff';
53use constant WGET_CMD  => 'wget';
54
55sub usage {
56    print STDERR "\n@_\n\n" if @_;
57    print STDERR <<HERE;
58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
59
60-a/--all      Scan all dual-life modules.
61
62-c/--cachedir Where to save downloaded CPAN tarball files
63              (defaults to /tmp/something/ with deletion after each run).
64
65-d/--diff     Display file differences using diff(1), rather than just
66              listing which files have changed.
67              The diff(1) command is assumed to be in your PATH.
68
69--diffopts    Options to pass to the diff command. Defaults to '-u'.
70
71-f|force      Force download from CPAN of new 02packages.details.txt file
72              (with --crosscheck only).
73
74-o/--output   File name to write output to (defaults to STDOUT).
75
76-r/--reverse  Reverses the diff (perl to CPAN).
77
78-v/--verbose  List the fate of *all* files in the tarball, not just those
79              that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82              that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option),  for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92    exit(1);
93}
94
95
96sub run {
97    my $scan_all;
98    my $diff_opts;
99    my $reverse    = 0;
100    my $cache_dir;
101    my $use_diff;
102    my $output_file;
103    my $verbose;
104    my $force;
105    my $do_crosscheck;
106
107    GetOptions(
108	'a|all'        => \$scan_all,
109	'c|cachedir=s' => \$cache_dir,
110	'd|diff'       => \$use_diff,
111	'diffopts:s'   => \$diff_opts,
112	'f|force'      => \$force,
113	'h|help'       => \&usage,
114	'o|output=s'   => \$output_file,
115	'r|reverse'    => \$reverse,
116	'v|verbose'    => \$verbose,
117	'x|crosscheck' => \$do_crosscheck,
118    ) or usage;
119
120
121    my @modules;
122
123    usage("Cannot mix -a with module list") if $scan_all && @ARGV;
124
125    if ($do_crosscheck) {
126	usage("can't use -r, -d, --diffopts, -v with --crosscheck")
127	    if ($reverse || $use_diff || $diff_opts || $verbose);
128    }
129    else {
130	$diff_opts = '-u' unless defined $diff_opts;
131	usage("can't use -f without --crosscheck") if $force;
132    }
133
134    @modules = $scan_all
135	        ? grep $Maintainers::Modules{$_}{CPAN},
136		    (sort {lc $a cmp lc $b } keys %Maintainers::Modules)
137	        : @ARGV;
138    usage("No modules specified") unless @modules;
139
140
141    my $outfh;
142    if (defined $output_file) {
143	open $outfh, '>', $output_file
144	    or die "ERROR: could not open file '$output_file' for writing: $!\n";
145    }
146    else {
147	open $outfh, ">&STDOUT"
148			    or die "ERROR: can't dup STDOUT: $!\n";
149    }
150
151    if (defined $cache_dir) {
152	die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
153    }
154
155    if ($do_crosscheck) {
156	do_crosscheck($outfh, $cache_dir, $force, \@modules);
157    }
158    else {
159	do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff,
160	    $reverse, $diff_opts);
161    }
162}
163
164
165
166# compare a list of modules against their CPAN equivalents
167
168sub do_compare {
169    my ($modules, $outfh, $output_file, $cache_dir, $verbose,
170		$use_diff, $reverse, $diff_opts) = @_;
171
172
173    # first, make sure we have a directory where they can all be untarred,
174    # and if its a permanent directory, clear any previous content
175    my $untar_dir;
176    if ($cache_dir) {
177	$untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR);
178	if (-d $untar_dir) {
179	    File::Path::rmtree($untar_dir)
180		    or die "failed to remove $untar_dir\n";
181	}
182	mkdir $untar_dir
183	    or die "mkdir $untar_dir: $!\n";
184    }
185    else {
186	$untar_dir = File::Temp::tempdir( CLEANUP => 1 );
187    }
188
189    my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE;
190
191    my %seen_dist;
192    for my $module (@$modules) {
193	warn "Processing $module ...\n" if defined $output_file;
194	print $outfh "\n$module\n" unless $use_diff;
195
196	my $m = $Maintainers::Modules{$module}
197	    or die "ERROR: No such module in Maintainers.pl: '$module'\n";
198
199	unless ($m->{CPAN}) {
200	    print $outfh "WARNING: $module is not dual-life; skipping\n";
201	    next;
202	}
203
204	my $dist = $m->{DISTRIBUTION};
205	die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
206
207	if ($seen_dist{$dist}) {
208	    warn "WARNING: duplicate entry for $dist in $module\n"
209	}
210	$seen_dist{$dist}++;
211
212	my $cpan_dir;
213	eval {
214	    $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist)
215	};
216	if ($@) {
217	    print $outfh "  ", $@;
218	    print $outfh "  (skipping)\n";
219	    next;
220	}
221
222	my @perl_files = Maintainers::get_module_files($module);
223
224	my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST');
225	die "ERROR: no such file: $manifest\n" unless  -f $manifest;
226
227	my $cpan_files = ExtUtils::Manifest::maniread($manifest);
228	my @cpan_files = sort keys %$cpan_files;
229
230	my ($excluded, $map) =  get_map($m, $module, \@perl_files);
231
232	my %perl_unseen;
233	@perl_unseen{@perl_files} = ();
234	my %perl_files = %perl_unseen;
235
236	foreach my $cpan_file (@cpan_files) {
237	    my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file);
238	    unless (defined $mapped_file) {
239	        print $outfh "  Excluded:  $cpan_file\n" if $verbose;
240	        next;
241	    }
242
243	    if (exists $perl_files{$mapped_file}) {
244	        delete $perl_unseen{$mapped_file};
245	    }
246	    else {
247	        # some CPAN files foo are stored in core as foo.packed,
248	        # which are then unpacked by 'make test_prep'
249	        my $packed_file = "$mapped_file.packed";
250	        if (exists $perl_files{$packed_file} ) {
251	            if (! -f $mapped_file and -f $packed_file) {
252	                print $outfh <<EOF;
253WARNING: $mapped_file not found, but .packed variant exists.
254Perhaps you need to run 'make test_prep'?
255EOF
256	                next;
257	            }
258	            delete $perl_unseen{$packed_file};
259	        }
260	        else {
261	            if ($ignorable{$cpan_file}) {
262	                print $outfh "  Ignored:   $cpan_file\n" if $verbose;
263	                next;
264	            }
265
266	            unless ($use_diff) {
267	                print $outfh "  CPAN only: $cpan_file",
268	                    ($cpan_file eq $mapped_file) ? "\n"
269				: " (expected $mapped_file)\n";
270	            }
271	            next;
272	        }
273	    }
274
275
276	    my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file);
277
278	    # should never happen
279	    die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file;
280
281	    # might happen if the FILES entry in Maintainers.pl is wrong
282	    unless (-f $mapped_file) {
283	        print $outfh "WARNING: perl file not found: $mapped_file\n";
284	        next;
285	    }
286
287
288	    if (File::Compare::compare($abs_cpan_file, $mapped_file)) {
289	        if ($use_diff) {
290	            file_diff($outfh, $abs_cpan_file, $mapped_file,
291	                                $reverse, $diff_opts);
292	        }
293	        else {
294	            if ($cpan_file eq $mapped_file) {
295	                print $outfh "  Modified:  $cpan_file\n";
296	            }
297	            else {
298	                print $outfh "  Modified:  $cpan_file $mapped_file\n";
299	            }
300	        }
301	    }
302	    elsif ($verbose) {
303	            if ($cpan_file eq $mapped_file) {
304	                print $outfh "  Unchanged: $cpan_file\n";
305	            }
306	            else {
307	                print $outfh "  Unchanged: $cpan_file $mapped_file\n";
308	            }
309	    }
310	}
311	for (sort keys %perl_unseen) {
312	    print $outfh "  Perl only: $_\n" unless $use_diff;
313	}
314    }
315}
316
317# given FooBar-1.23_45.tar.gz, return FooBar
318
319sub distro_base {
320    my $d = shift;
321    $d =~ s/\.tar\.gz$//;
322    $d =~ s/\.gip$//;
323    $d =~ s/[\d\-_\.]+$//;
324    return $d;
325}
326
327# process --crosscheck action:
328# ie list all distributions whose CPAN versions differ from that listed in
329# Maintainers.pl
330
331sub do_crosscheck {
332    my ($outfh, $cache_dir, $force, $modules) = @_;
333
334    my $file = '02packages.details.txt';
335    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
336    my $path = File::Spec->catfile($download_dir, $file);
337    my $gzfile = "$path.gz";
338
339    # grab 02packages.details.txt
340
341    my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz';
342
343    if (! -f $gzfile or $force) {
344	unlink $gzfile;
345	my_getstore($url, $gzfile);
346    }
347    unlink $path;
348    IO::Uncompress::Gunzip::gunzip($gzfile, $path)
349	or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
350
351    # suck in the data from it
352
353    open my $fh, '<', $path
354	or die "ERROR: open: $file: $!\n";
355
356    my %distros;
357    my %modules;
358
359    while (<$fh>) {
360	next if 1../^$/;
361	chomp;
362	my @f = split ' ', $_;
363	if (@f != 3) {
364	    warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
365	    next;
366	}
367	my $distro = $f[2];
368	$distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/
369	$modules{$f[0]} = $distro;
370
371	(my $short_distro = $distro) =~ s{^.*/}{};
372
373	$distros{distro_base($short_distro)}{$distro} = 1;
374    }
375
376    for my $module (@$modules) {
377	my $m = $Maintainers::Modules{$module}
378	    or die "ERROR: No such module in Maintainers.pl: '$module'\n";
379
380	unless ($m->{CPAN}) {
381	    print $outfh "\nWARNING: $module is not dual-life; skipping\n";
382	    next;
383	}
384
385	# given an entry like
386	#   Foo::Bar 1.23 foo-bar-1.23.tar.gz,
387	# first compare the module name against Foo::Bar, and failing that,
388	# against foo-bar
389
390	my $pdist = $m->{DISTRIBUTION};
391	die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
392
393	my $cdist = $modules{$module};
394	(my $short_pdist = $pdist) =~ s{^.*/}{};
395
396	unless (defined $cdist) {
397	    my $d = $distros{distro_base($short_pdist)};
398	    unless (defined $d) {
399	        print $outfh "\n$module: Can't determine current CPAN entry\n";
400	        next;
401	    }
402	    if (keys %$d > 1) {
403	        print $outfh "\n$module: (found more than one CPAN candidate):\n";
404	        print $outfh "    perl: $pdist\n";
405	        print $outfh "    CPAN: $_\n" for sort keys %$d;
406	        next;
407	    }
408	    $cdist = (keys %$d)[0];
409	}
410
411	if ($cdist ne $pdist) {
412	    print $outfh "\n$module:\n    Perl: $pdist\n    CPAN: $cdist\n";
413	}
414    }
415}
416
417
418
419# get the EXCLUDED and MAP entries for this module, or
420# make up defauts if they don't exist
421
422sub get_map {
423    my ($m, $module_name, $perl_files) = @_;
424
425    my ($excluded, $map) = @$m{qw(EXCLUDED MAP)};
426
427    $excluded ||= [];
428
429    return $excluded, $map if $map;
430
431    # all files under ext/foo-bar (plus maybe some under t/lib)???
432
433    my $ext;
434    for (@$perl_files) {
435	if (m{^(ext/[^/]+/)}) {
436	    if (defined $ext and $ext ne $1) {
437	        # more than one ext/$ext/
438	        undef $ext;
439	        last;
440	    }
441	    $ext = $1;
442	}
443	elsif (m{^t/lib/}) {
444	    next;
445	}
446	else {
447	    undef $ext;
448	    last;
449	}
450    }
451
452    if (defined $ext) {
453	    $map = { '' => $ext },
454    }
455    else {
456	(my $base = $module_name) =~ s{::}{/}g;
457	$base ="lib/$base";
458	$map = {
459	    'lib/'	=> 'lib/',
460	    ''	=> "$base/",
461	};
462    }
463    return $excluded, $map;
464}
465
466
467# Given an exclude list and a mapping hash, convert a CPAN filename
468# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
469# Returns an empty list for an excluded file
470
471sub cpan_to_perl {
472    my ($excluded, $map, $cpan_file) = @_;
473
474    for my $exclude (@$excluded) {
475	# may be a simple string to match exactly, or a pattern
476	if (ref $exclude) {
477	    return if $cpan_file =~ $exclude;
478	}
479	else {
480	    return if $cpan_file eq $exclude;
481	}
482    }
483
484    my $perl_file = $cpan_file;
485
486    # try longest prefix first, then alphabetically on tie-break
487    for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map)
488    {
489	last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
490    }
491    return $perl_file;
492}
493
494
495
496# do LWP::Simple::getstore, possibly without LWP::Simple being available
497
498my $lwp_simple_available;
499
500sub my_getstore {
501    my ($url, $file) = @_;
502    unless (defined $lwp_simple_available) {
503	eval { require LWP::Simple };
504	$lwp_simple_available = $@ eq '';
505    }
506    if ($lwp_simple_available) {
507	return LWP::Simple::is_success(LWP::Simple::getstore($url, $file));
508    }
509    else {
510	return system(WGET_CMD, "-O", $file, $url) == 0;
511    }
512}
513
514
515# download and unpack a distribution
516# Returns the full pathname of the extracted directory
517# (eg '/tmp/XYZ/Foo_bar-1.23')
518
519# cache_dir: where to dowenload the .tar.gz file to
520# untar_dir: where to untar or unzup the file
521# module:    name of module
522# dist:      name of the distribution
523
524sub get_distribution {
525    my ($cache_dir, $untar_dir, $module, $dist) = @_;
526
527    $dist =~ m{.+/([^/]+)$}
528	or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
529    my $filename = $1;
530
531    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
532    my $download_file = File::Spec->catfile($download_dir, $filename);
533
534    # download distribution
535
536    if (-f $download_file and ! -s $download_file ) {
537	# wget can leave a zero-length file on failed download
538	unlink $download_file;
539    }
540
541    unless (-f $download_file) {
542	# not cached
543	$dist =~ /^([A-Z])([A-Z])/
544	    or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n";
545
546	my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist";
547	my_getstore($url, $download_file)
548	    or die "ERROR: Could not fetch '$url'\n";
549    }
550
551    # extract distribution
552
553    my $ae = Archive::Extract->new( archive => $download_file);
554    $ae->extract( to => $untar_dir )
555	or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n";
556
557    # get the name of the extracted distribution dir
558
559    my $path = File::Spec->catfile($untar_dir, $filename);
560
561    $path =~ s/\.tar\.gz$// or
562    $path =~ s/\.zip$// or
563      die "ERROR: downloaded file does not have a recognised suffix: $path\n";
564
565    die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
566
567    return $path;
568}
569
570
571# produce the diff of a single file
572sub file_diff {
573    my $outfh     = shift;
574    my $cpan_file = shift;
575    my $perl_file = shift;
576    my $reverse   = shift;
577    my $diff_opts = shift;
578
579
580    my @cmd = (DIFF_CMD, split ' ', $diff_opts);
581    if ($reverse) {
582	push @cmd, $perl_file, $cpan_file;
583    }
584    else {
585	push @cmd, $cpan_file, $perl_file;
586    }
587    my $result = `@cmd`;
588
589    $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
590
591    print $outfh $result;
592}
593
594
595run();
596
597