xref: /openbsd-src/gnu/usr.bin/perl/Porting/core-cpan-diff (revision 898184e3e61f9129feb5978fad5a8c6865f00b92)
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Basename ();
14use File::Copy     ();
15use File::Temp     ();
16use File::Path     ();
17use File::Spec::Functions;
18use Archive::Extract;
19use IO::Uncompress::Gunzip ();
20use File::Compare          ();
21use ExtUtils::Manifest;
22use ExtUtils::MakeMaker ();
23use HTTP::Tiny;
24
25BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
26use lib 'Porting';
27use Maintainers ();
28
29# if running from blead, we may be doing -Ilib, which means when we
30# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31# So preload the things we need, and tell it to check %INC first:
32
33use Archive::Tar;
34use IPC::Open3;
35use IO::Select;
36$Module::Load::Conditional::CHECK_INC_HASH = 1;
37
38# stop Archive::Extract whinging about lack of Archive::Zip
39$Archive::Extract::WARN = 0;
40
41# where, under the cache dir, to download tarballs to
42use constant SRC_DIR => 'tarballs';
43
44# where, under the cache dir, to untar stuff to
45use constant UNTAR_DIR => 'untarred';
46
47use constant DIFF_CMD => 'diff';
48use constant WGET_CMD => 'wget';
49
50sub usage {
51    print STDERR "\n@_\n\n" if @_;
52    print STDERR <<HERE;
53Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
54
55-a/--all      Scan all dual-life modules.
56
57-c/--cachedir Where to save downloaded CPAN tarball files
58              (defaults to /tmp/something/ with deletion after each run).
59
60-d/--diff     Display file differences using diff(1), rather than just
61              listing which files have changed.
62              The diff(1) command is assumed to be in your PATH.
63
64--diffopts    Options to pass to the diff command. Defaults to '-u'.
65
66-f|force      Force download from CPAN of new 02packages.details.txt file
67              (with --crosscheck only).
68
69-m|mirror     Preferred CPAN mirror URI (http:// or file:///)
70              (Local mirror must be a complete mirror, not minicpan)
71
72-o/--output   File name to write output to (defaults to STDOUT).
73
74-r/--reverse  Reverses the diff (perl to CPAN).
75
76-u/--upstream only print modules with the given upstream (defaults to all)
77
78-v/--verbose  List the fate of *all* files in the tarball, not just those
79              that differ or are missing.
80
81-x|crosscheck List the distributions whose current CPAN version differs from
82              that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
83
84By default (i.e. without the --crosscheck option),  for each listed module
85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
86from CPAN associated with that module, and compare the files in it with
87those in the perl source tree.
88
89Must be run from the root of the perl source tree.
90Module names must match the keys of %Modules in Maintainers.pl.
91HERE
92    exit(1);
93}
94
95sub run {
96    my $scan_all;
97    my $diff_opts;
98    my $reverse = 0;
99    my @wanted_upstreams;
100    my $cache_dir;
101    my $mirror_url = "http://www.cpan.org/";
102    my $use_diff;
103    my $output_file;
104    my $verbose;
105    my $force;
106    my $do_crosscheck;
107
108    GetOptions(
109        'a|all'         => \$scan_all,
110        'c|cachedir=s'  => \$cache_dir,
111        'd|diff'        => \$use_diff,
112        'diffopts:s'    => \$diff_opts,
113        'f|force'       => \$force,
114        'h|help'        => \&usage,
115        'm|mirror=s'    => \$mirror_url,
116        'o|output=s'    => \$output_file,
117        'r|reverse'     => \$reverse,
118        'u|upstream=s@' => \@wanted_upstreams,
119        'v|verbose'     => \$verbose,
120        'x|crosscheck'  => \$do_crosscheck,
121    ) or usage;
122
123    my @modules;
124
125    usage("Cannot mix -a with module list") if $scan_all && @ARGV;
126
127    if ($do_crosscheck) {
128        usage("can't use -r, -d, --diffopts, -v with --crosscheck")
129          if ( $reverse || $use_diff || $diff_opts || $verbose );
130    }
131    else {
132        $diff_opts = '-u -b' unless defined $diff_opts;
133        usage("can't use -f without --crosscheck") if $force;
134    }
135
136    @modules =
137      $scan_all
138      ? grep $Maintainers::Modules{$_}{CPAN},
139      ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
140      : @ARGV;
141    usage("No modules specified") unless @modules;
142
143    my $outfh;
144    if ( defined $output_file ) {
145        open $outfh, '>', $output_file
146          or die "ERROR: could not open file '$output_file' for writing: $!\n";
147    }
148    else {
149        open $outfh, ">&STDOUT"
150          or die "ERROR: can't dup STDOUT: $!\n";
151    }
152
153    if ( defined $cache_dir ) {
154        die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir;
155    }
156    else {
157        $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
158    }
159
160    $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
161    my $test_file = "modules/03modlist.data.gz";
162    my_getstore(
163        cpan_url( $mirror_url, $test_file ),
164        catfile( $cache_dir, $test_file )
165    ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
166
167    if ($do_crosscheck) {
168        do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules );
169    }
170    else {
171        do_compare(
172            \@modules,  $outfh,      $output_file,
173            $cache_dir, $mirror_url, $verbose,
174            $use_diff,  $reverse,    $diff_opts,
175            \@wanted_upstreams
176        );
177    }
178}
179
180# construct a CPAN url
181
182sub cpan_url {
183    my ( $mirror_url, @path ) = @_;
184    return $mirror_url unless @path;
185    my $cpan_path = join( "/", map { split "/", $_ } @path );
186    $cpan_path =~ s{\A/}{};    # remove leading slash since url has one trailing
187    return $mirror_url . $cpan_path;
188}
189
190# construct a CPAN URL for a author/distribution string like:
191# BINGOS/Archive-Extract-0.52.tar.gz
192
193sub cpan_url_distribution {
194    my ( $mirror_url, $distribution ) = @_;
195    $distribution =~ /^([A-Z])([A-Z])/
196        or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
197    my $path = "modules/by-authors/id/$1/$1$2/$distribution";
198    return cpan_url( $mirror_url, $path );
199}
200
201# compare a list of modules against their CPAN equivalents
202
203sub do_compare {
204    my (
205        $modules,    $outfh,   $output_file, $cache_dir,
206        $mirror_url, $verbose, $use_diff,    $reverse,
207        $diff_opts,  $wanted_upstreams
208    ) = @_;
209
210    # first, make sure we have a directory where they can all be untarred,
211    # and if its a permanent directory, clear any previous content
212    my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
213    my $src_dir   = catdir( $cache_dir, SRC_DIR );
214    for my $d ( $src_dir, $untar_dir ) {
215        next if -d $d;
216        mkdir $d or die "mkdir $d: $!\n";
217    }
218
219    my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
220
221    my %seen_dist;
222    for my $module (@$modules) {
223        warn "Processing $module ...\n" if defined $output_file;
224
225        my $m = $Maintainers::Modules{$module}
226          or die "ERROR: No such module in Maintainers.pl: '$module'\n";
227
228        unless ( $m->{CPAN} ) {
229            print $outfh "WARNING: $module is not dual-life; skipping\n";
230            next;
231        }
232
233        my $dist = $m->{DISTRIBUTION};
234        die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
235
236        if ( $seen_dist{$dist}++ ) {
237            warn "WARNING: duplicate entry for $dist in $module\n";
238        }
239
240        my $upstream = $m->{UPSTREAM} || 'UNKNOWN';
241        next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams );
242
243        print $outfh "\n$module - "
244          . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
245        print $outfh "  upstream is: "
246          . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n";
247
248        my $cpan_dir;
249        eval {
250            $cpan_dir =
251              get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
252                $dist );
253        };
254        if ($@) {
255            print $outfh "  ", $@;
256            print $outfh "  (skipping)\n";
257            next;
258        }
259
260        my @perl_files = Maintainers::get_module_files($module);
261
262        my $manifest = catfile( $cpan_dir, 'MANIFEST' );
263        die "ERROR: no such file: $manifest\n" unless -f $manifest;
264
265        my $cpan_files = ExtUtils::Manifest::maniread($manifest);
266        my @cpan_files = sort keys %$cpan_files;
267
268        ( my $main_pm = $module ) =~ s{::}{/}g;
269        $main_pm .= ".pm";
270
271        my ( $excluded, $map, $customized ) =
272          get_map( $m, $module, \@perl_files );
273
274        my %perl_unseen;
275        @perl_unseen{@perl_files} = ();
276        my %perl_files = %perl_unseen;
277
278        foreach my $cpan_file (@cpan_files) {
279            my $mapped_file =
280              cpan_to_perl( $excluded, $map, $customized, $cpan_file );
281            unless ( defined $mapped_file ) {
282                print $outfh "  Excluded:  $cpan_file\n" if $verbose;
283                next;
284            }
285
286            if ( exists $perl_files{$mapped_file} ) {
287                delete $perl_unseen{$mapped_file};
288            }
289            else {
290
291                # some CPAN files foo are stored in core as foo.packed,
292                # which are then unpacked by 'make test_prep'
293                my $packed_file = "$mapped_file.packed";
294                if ( exists $perl_files{$packed_file} ) {
295                    if ( !-f $mapped_file and -f $packed_file ) {
296                        print $outfh <<EOF;
297WARNING: $mapped_file not found, but .packed variant exists.
298Perhaps you need to run 'make test_prep'?
299EOF
300                        next;
301                    }
302                    delete $perl_unseen{$packed_file};
303                }
304                else {
305                    if ( $ignorable{$cpan_file} ) {
306                        print $outfh "  Ignored:   $cpan_file\n" if $verbose;
307                        next;
308                    }
309
310                    unless ($use_diff) {
311                        print $outfh "  CPAN only: $cpan_file",
312                          ( $cpan_file eq $mapped_file )
313                          ? "\n"
314                          : " (missing $mapped_file)\n";
315                    }
316                    next;
317                }
318            }
319
320            my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
321
322            # should never happen
323            die "ERROR: can't find file $abs_cpan_file\n"
324              unless -f $abs_cpan_file;
325
326            # might happen if the FILES entry in Maintainers.pl is wrong
327            unless ( -f $mapped_file ) {
328                print $outfh "WARNING: perl file not found: $mapped_file\n";
329                next;
330            }
331
332            my $relative_mapped_file = relatively_mapped($mapped_file);
333
334            my $different =
335              file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
336                $diff_opts );
337            if ( $different && customized( $m, $relative_mapped_file ) ) {
338                if (! $use_diff ) {
339                    print $outfh "  Customized for blead: $relative_mapped_file\n";
340                }
341            }
342            elsif ($different) {
343                if ($use_diff) {
344                    $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
345                    print $outfh $different;
346                }
347                else {
348                    if ( $cpan_file eq $relative_mapped_file ) {
349                        print $outfh "  Modified:  $relative_mapped_file\n";
350                    }
351                    else {
352                        print $outfh
353                          "  Modified:  $cpan_file $relative_mapped_file\n";
354                    }
355
356                    if ( $cpan_file =~ m{\.pm\z} ) {
357                        my $pv = MM->parse_version($mapped_file)   || 'unknown';
358                        my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
359                        if ( $pv ne $cv ) {
360                            print $outfh
361"  Version mismatch in '$cpan_file':\n    $cv (cpan) vs $pv (perl)\n";
362                        }
363                    }
364
365                }
366            }
367            elsif ( customized( $m, $relative_mapped_file ) ) {
368                # Maintainers.pl says we customized it, but it looks the
369                # same as CPAN so maybe we lost the customization, which
370                # could be bad
371                if ( $cpan_file eq $relative_mapped_file ) {
372                    print $outfh "  Blead customization missing: $cpan_file\n";
373                }
374                else {
375                    print $outfh
376                      "  Blead customization missing: $cpan_file $relative_mapped_file\n";
377                }
378            }
379            elsif ($verbose) {
380                if ( $cpan_file eq $relative_mapped_file ) {
381                    print $outfh "  Unchanged: $cpan_file\n";
382                }
383                else {
384                    print $outfh
385                      "  Unchanged: $cpan_file $relative_mapped_file\n";
386                }
387            }
388        }
389        for ( sort keys %perl_unseen ) {
390            my $relative_mapped_file = relatively_mapped($_);
391            if ( customized( $m, $relative_mapped_file ) ) {
392                print $outfh "  Customized for blead: $_\n";
393            }
394            else {
395                print $outfh "  Perl only: $_\n" unless $use_diff;
396            }
397        }
398    }
399}
400
401sub relatively_mapped {
402    my $relative = shift;
403    $relative =~ s/^(cpan|dist|ext)\/.*?\///;
404    return $relative;
405}
406
407# given FooBar-1.23_45.tar.gz, return FooBar
408
409sub distro_base {
410    my $d = shift;
411    $d =~ s/\.tar\.gz$//;
412    $d =~ s/\.gip$//;
413    $d =~ s/[\d\-_\.]+$//;
414    return $d;
415}
416
417# process --crosscheck action:
418# ie list all distributions whose CPAN versions differ from that listed in
419# Maintainers.pl
420
421sub do_crosscheck {
422    my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_;
423
424    my $file         = '02packages.details.txt';
425    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
426    my $path         = catfile( $download_dir, $file );
427    my $gzfile       = "$path.gz";
428
429    # grab 02packages.details.txt
430
431    my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
432
433    if ( !-f $gzfile or $force ) {
434        unlink $gzfile;
435        my_getstore( $url, $gzfile );
436    }
437    unlink $path;
438    IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
439      or die
440      "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
441
442    # suck in the data from it
443
444    open my $fh, '<', $path
445      or die "ERROR: open: $file: $!\n";
446
447    my %distros;
448    my %modules;
449
450    while (<$fh>) {
451        next if 1 .. /^$/;
452        chomp;
453        my @f = split ' ', $_;
454        if ( @f != 3 ) {
455            warn
456              "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
457            next;
458        }
459        my $distro = $f[2];
460        $distro =~ s{^[A-Z]/[A-Z]{2}/}{};    # strip leading A/AB/
461        $modules{ $f[0] } = $distro;
462
463        ( my $short_distro = $distro ) =~ s{^.*/}{};
464
465        $distros{ distro_base($short_distro) }{$distro} = 1;
466    }
467
468    for my $module (@$modules) {
469        my $m = $Maintainers::Modules{$module}
470          or die "ERROR: No such module in Maintainers.pl: '$module'\n";
471
472        unless ( $m->{CPAN} ) {
473            print $outfh "\nWARNING: $module is not dual-life; skipping\n";
474            next;
475        }
476
477        # given an entry like
478        #   Foo::Bar 1.23 foo-bar-1.23.tar.gz,
479        # first compare the module name against Foo::Bar, and failing that,
480        # against foo-bar
481
482        my $pdist = $m->{DISTRIBUTION};
483        die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
484
485        my $cdist = $modules{$module};
486        ( my $short_pdist = $pdist ) =~ s{^.*/}{};
487
488        unless ( defined $cdist ) {
489            my $d = $distros{ distro_base($short_pdist) };
490            unless ( defined $d ) {
491                print $outfh "\n$module: Can't determine current CPAN entry\n";
492                next;
493            }
494            if ( keys %$d > 1 ) {
495                print $outfh
496                  "\n$module: (found more than one CPAN candidate):\n";
497                print $outfh "    perl: $pdist\n";
498                print $outfh "    CPAN: $_\n" for sort keys %$d;
499                next;
500            }
501            $cdist = ( keys %$d )[0];
502        }
503
504        if ( $cdist ne $pdist ) {
505            print $outfh "\n$module:\n    Perl: $pdist\n    CPAN: $cdist\n";
506        }
507    }
508}
509
510# get the EXCLUDED and MAP entries for this module, or
511# make up defauts if they don't exist
512
513sub get_map {
514    my ( $m, $module_name, $perl_files ) = @_;
515
516    my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
517
518    $excluded   ||= [];
519    $customized ||= [];
520
521    return $excluded, $map, $customized if $map;
522
523    # all files under ext/foo-bar (plus maybe some under t/lib)???
524
525    my $ext;
526    for (@$perl_files) {
527        if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
528            if ( defined $ext and $ext ne $1 ) {
529
530                # more than one ext/$ext/
531                undef $ext;
532                last;
533            }
534            $ext = $1;
535        }
536        elsif (m{^t/lib/}) {
537            next;
538        }
539        else {
540            undef $ext;
541            last;
542        }
543    }
544
545    if ( defined $ext ) {
546        $map = { '' => $ext },;
547    }
548    else {
549        ( my $base = $module_name ) =~ s{::}{/}g;
550        $base = "lib/$base";
551        $map  = {
552            'lib/' => 'lib/',
553            ''     => "$base/",
554        };
555    }
556    return $excluded, $map, $customized;
557}
558
559# Given an exclude list and a mapping hash, convert a CPAN filename
560# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
561# Returns an empty list for an excluded file
562
563sub cpan_to_perl {
564    my ( $excluded, $map, $customized, $cpan_file ) = @_;
565
566    for my $exclude (@$excluded) {
567        next if $exclude ~~ $customized;
568
569        # may be a simple string to match exactly, or a pattern
570        if ( ref $exclude ) {
571            return if $cpan_file =~ $exclude;
572        }
573        else {
574            return if $cpan_file eq $exclude;
575        }
576    }
577
578    my $perl_file = $cpan_file;
579
580    # try longest prefix first, then alphabetically on tie-break
581    for
582      my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
583    {
584        last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
585    }
586    return $perl_file;
587}
588
589# fetch a file from a URL and store it in a file given by a filename
590
591sub my_getstore {
592    my ( $url, $file ) = @_;
593    File::Path::mkpath( File::Basename::dirname($file) );
594    if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
595        ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
596        File::Copy::copy( $local_path, $file );
597    } else {
598        my $http = HTTP::Tiny->new;
599        my $response = $http->mirror($url, $file);
600        return $response->{success};
601    }
602}
603
604# download and unpack a distribution
605# Returns the full pathname of the extracted directory
606# (eg '/tmp/XYZ/Foo_bar-1.23')
607
608# cache_dir:  where to download the .tar.gz file to
609# mirror_url: CPAN mirror to download from
610# untar_dir:  where to untar or unzup the file
611# module:     name of module
612# dist:       name of the distribution
613
614sub get_distribution {
615    my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
616
617    $dist =~ m{.+/([^/]+)$}
618      or die
619      "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
620    my $filename = $1;
621
622    my $download_file = catfile( $src_dir, $filename );
623
624    # download distribution
625
626    if ( -f $download_file and !-s $download_file ) {
627
628        # wget can leave a zero-length file on failed download
629        unlink $download_file;
630    }
631
632    unless ( -f $download_file ) {
633
634        # not cached
635        my $url = cpan_url_distribution( $mirror_url, $dist );
636        my_getstore( $url, $download_file )
637          or die "ERROR: Could not fetch '$url'\n";
638    }
639
640    # get the expected name of the extracted distribution dir
641
642    my $path = catfile( $untar_dir, $filename );
643
644    $path =~ s/\.tar\.gz$//
645      or $path =~ s/\.zip$//
646      or die
647      "ERROR: downloaded file does not have a recognised suffix: $path\n";
648
649    # extract it unless we already have it cached or tarball is newer
650    if ( !-d $path || ( -M $download_file < -M $path ) ) {
651        my $ae = Archive::Extract->new( archive => $download_file );
652        $ae->extract( to => $untar_dir )
653          or die
654          "ERROR: failed to extract distribution '$download_file to temp. dir: "
655          . $ae->error() . "\n";
656
657        $path = $ae->extract_path;
658    }
659
660    die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
661
662    return $path;
663}
664
665# produce the diff of a single file
666sub file_diff {
667    my $outfh     = shift;
668    my $cpan_file = shift;
669    my $perl_file = shift;
670    my $reverse   = shift;
671    my $diff_opts = shift;
672
673    my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
674    if ($reverse) {
675        push @cmd, $perl_file, $cpan_file;
676    }
677    else {
678        push @cmd, $cpan_file, $perl_file;
679    }
680    return `@cmd`;
681
682}
683
684sub customized {
685    my ( $module_data, $file ) = @_;
686    return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
687}
688
689run();
690
691