xref: /openbsd-src/gnu/usr.bin/perl/Porting/core-cpan-diff (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1#!/usr/bin/env perl
2
3# core-cpan-diff: Compare CPAN modules with their equivalent in core
4
5# Originally based on App::DualLivedDiff by Steffen Mueller.
6
7use strict;
8use warnings;
9
10use 5.010;
11
12use Getopt::Long;
13use File::Basename ();
14use File::Copy     ();
15use File::Temp     ();
16use File::Path     ();
17use File::Spec::Functions;
18use Archive::Extract;
19use IO::Uncompress::Gunzip ();
20use File::Compare          ();
21use ExtUtils::Manifest;
22use ExtUtils::MakeMaker ();
23use HTTP::Tiny;
24
25BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' }
26use lib 'Porting';
27use Maintainers ();
28
29# if running from blead, we may be doing -Ilib, which means when we
30# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc.
31# So preload the things we need, and tell it to check %INC first:
32
33use Archive::Tar;
34use IPC::Open3;
35use IO::Select;
36$Module::Load::Conditional::CHECK_INC_HASH = 1;
37
38# stop Archive::Extract whinging about lack of Archive::Zip
39$Archive::Extract::WARN = 0;
40
41# where, under the cache dir, to download tarballs to
42use constant SRC_DIR => 'tarballs';
43
44# where, under the cache dir, to untar stuff to
45use constant UNTAR_DIR => 'untarred';
46
47use constant DIFF_CMD => 'diff';
48
49sub usage {
50    print STDERR "\n@_\n\n" if @_;
51    print STDERR <<HERE;
52Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ]
53
54-a/--all      Scan all dual-life modules.
55
56-c/--cachedir Where to save downloaded CPAN tarball files
57              (defaults to /tmp/something/ with deletion after each run).
58
59-d/--diff     Display file differences using diff(1), rather than just
60              listing which files have changed.
61
62--diffopts    Options to pass to the diff command. Defaults to '-u'.
63
64-f|force      Force download from CPAN of new 02packages.details.txt file
65              (with --crosscheck only).
66
67-m|mirror     Preferred CPAN mirror URI (http:// or file:///)
68              (Local mirror must be a complete mirror, not minicpan)
69
70-o/--output   File name to write output to (defaults to STDOUT).
71
72-r/--reverse  Reverses the diff (perl to CPAN).
73
74-u/--upstream only print modules with the given upstream (defaults to all)
75
76-v/--verbose  List the fate of *all* files in the tarball, not just those
77              that differ or are missing.
78
79-x|crosscheck List the distributions whose current CPAN version differs from
80              that in blead (i.e. the DISTRIBUTION field in Maintainers.pl).
81
82By default (i.e. without the --crosscheck option),  for each listed module
83(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball
84from CPAN associated with that module, and compare the files in it with
85those in the perl source tree.
86
87Must be run from the root of the perl source tree.
88Module names must match the keys of %Modules in Maintainers.pl.
89
90The diff(1) command is assumed to be in your PATH and is used to diff files
91regardless of whether the --diff option has been chosen to display any file
92differences.
93HERE
94    exit(1);
95}
96
97sub run {
98    my $scan_all;
99    my $diff_opts;
100    my $reverse = 0;
101    my @wanted_upstreams;
102    my $cache_dir;
103    my $mirror_url = "http://www.cpan.org/";
104    my $use_diff;
105    my $output_file;
106    my $verbose;
107    my $force;
108    my $do_crosscheck;
109
110    GetOptions(
111        'a|all'         => \$scan_all,
112        'c|cachedir=s'  => \$cache_dir,
113        'd|diff'        => \$use_diff,
114        'diffopts:s'    => \$diff_opts,
115        'f|force'       => \$force,
116        'h|help'        => \&usage,
117        'm|mirror=s'    => \$mirror_url,
118        'o|output=s'    => \$output_file,
119        'r|reverse'     => \$reverse,
120        'u|upstream=s@' => \@wanted_upstreams,
121        'v|verbose'     => \$verbose,
122        'x|crosscheck'  => \$do_crosscheck,
123    ) or usage;
124
125    @wanted_upstreams = map { $_ eq 'undef' ? undef : $_ } @wanted_upstreams;
126
127    my @modules;
128
129    usage("Cannot mix -a with module list") if $scan_all && @ARGV;
130
131    if ($do_crosscheck) {
132        usage("can't use -r, -d, --diffopts, -v with --crosscheck")
133          if ( $reverse || $use_diff || $diff_opts || $verbose );
134    }
135    else {
136        $diff_opts = '-u -b' unless defined $diff_opts;
137        usage("can't use -f without --crosscheck") if $force;
138    }
139
140    @modules =
141      $scan_all
142      ? grep $Maintainers::Modules{$_}{CPAN},
143      ( sort { lc $a cmp lc $b } keys %Maintainers::Modules )
144      : @ARGV;
145    usage("No modules specified") unless @modules;
146
147    my $outfh;
148    if ( defined $output_file ) {
149        open $outfh, '>', $output_file
150          or die "ERROR: could not open file '$output_file' for writing: $!\n";
151    }
152    else {
153        open $outfh, ">&STDOUT"
154          or die "ERROR: can't dup STDOUT: $!\n";
155    }
156
157    if ( defined $cache_dir ) {
158        die "ERROR: not a directory: '$cache_dir'\n"
159            if !-d $cache_dir && -e $cache_dir;
160        File::Path::mkpath($cache_dir);
161    }
162    else {
163        $cache_dir = File::Temp::tempdir( CLEANUP => 1 );
164    }
165
166    $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/";
167    my $test_file = "modules/03modlist.data.gz";
168    my_getstore(
169        cpan_url( $mirror_url, $test_file ),
170        catfile( $cache_dir, $test_file )
171    ) or die "ERROR: not a CPAN mirror '$mirror_url'\n";
172
173    if ($do_crosscheck) {
174        do_crosscheck(
175            $outfh, $cache_dir, $mirror_url,
176            $force, \@modules,  \@wanted_upstreams
177        );
178    }
179    else {
180        do_compare(
181            \@modules,  $outfh,      $output_file,
182            $cache_dir, $mirror_url, $verbose,
183            $use_diff,  $reverse,    $diff_opts,
184            \@wanted_upstreams
185        );
186    }
187}
188
189# construct a CPAN url
190
191sub cpan_url {
192    my ( $mirror_url, @path ) = @_;
193    return $mirror_url unless @path;
194    my $cpan_path = join( "/", map { split "/", $_ } @path );
195    $cpan_path =~ s{\A/}{};    # remove leading slash since url has one trailing
196    return $mirror_url . $cpan_path;
197}
198
199# construct a CPAN URL for a author/distribution string like:
200# BINGOS/Archive-Extract-0.52.tar.gz
201
202sub cpan_url_distribution {
203    my ( $mirror_url, $distribution ) = @_;
204    $distribution =~ /^([A-Z])([A-Z])/
205        or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n";
206    my $path = "authors/id/$1/$1$2/$distribution";
207    return cpan_url( $mirror_url, $path );
208}
209
210# compare a list of modules against their CPAN equivalents
211
212sub do_compare {
213    my (
214        $modules,    $outfh,   $output_file, $cache_dir,
215        $mirror_url, $verbose, $use_diff,    $reverse,
216        $diff_opts,  $wanted_upstreams
217    ) = @_;
218
219    # first, make sure we have a directory where they can all be untarred,
220    # and if its a permanent directory, clear any previous content
221    my $untar_dir = catdir( $cache_dir, UNTAR_DIR );
222    my $src_dir   = catdir( $cache_dir, SRC_DIR );
223    for my $d ( $src_dir, $untar_dir ) {
224        next if -d $d;
225        mkdir $d or die "mkdir $d: $!\n";
226    }
227
228    my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE;
229    my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
230
231    my %seen_dist;
232    for my $module (@$modules) {
233        warn "Processing $module ...\n" if defined $output_file;
234
235        my $m = $Maintainers::Modules{$module}
236          or die "ERROR: No such module in Maintainers.pl: '$module'\n";
237
238        unless ( $m->{CPAN} ) {
239            print $outfh "WARNING: $module is not dual-life; skipping\n";
240            next;
241        }
242
243        my $dist = $m->{DISTRIBUTION};
244        die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist;
245
246        if ( $seen_dist{$dist}++ ) {
247            warn "WARNING: duplicate entry for $dist in $module\n";
248        }
249
250        my $upstream = $m->{UPSTREAM};
251        next if @$wanted_upstreams and !$wanted_upstream{$upstream};
252
253        print $outfh "\n$module - "
254          . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n";
255        print $outfh "  upstream is: "
256          . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n";
257
258        my $cpan_dir;
259        eval {
260            $cpan_dir =
261              get_distribution( $src_dir, $mirror_url, $untar_dir, $module,
262                $dist );
263        };
264        if ($@) {
265            print $outfh "  ", $@;
266            print $outfh "  (skipping)\n";
267            next;
268        }
269
270        my @perl_files = Maintainers::get_module_files($module);
271
272        my $manifest = catfile( $cpan_dir, 'MANIFEST' );
273        die "ERROR: no such file: $manifest\n" unless -f $manifest;
274
275        my $cpan_files = ExtUtils::Manifest::maniread($manifest);
276        my @cpan_files = sort keys %$cpan_files;
277
278        ( my $main_pm = $module ) =~ s{::}{/}g;
279        $main_pm .= ".pm";
280
281        my ( $excluded, $map, $customized ) =
282          get_map( $m, $module, \@perl_files );
283
284        my %perl_unseen;
285        @perl_unseen{@perl_files} = ();
286        my %perl_files = %perl_unseen;
287
288        foreach my $cpan_file (@cpan_files) {
289            my $mapped_file =
290              cpan_to_perl( $excluded, $map, $customized, $cpan_file );
291            unless ( defined $mapped_file ) {
292                print $outfh "  Excluded:  $cpan_file\n" if $verbose;
293                next;
294            }
295
296            if ( exists $perl_files{$mapped_file} ) {
297                delete $perl_unseen{$mapped_file};
298            }
299            else {
300
301                # some CPAN files foo are stored in core as foo.packed,
302                # which are then unpacked by 'make test_prep'
303                my $packed_file = "$mapped_file.packed";
304                if ( exists $perl_files{$packed_file} ) {
305                    if ( !-f $mapped_file and -f $packed_file ) {
306                        print $outfh <<EOF;
307WARNING: $mapped_file not found, but .packed variant exists.
308Perhaps you need to run 'make test_prep'?
309EOF
310                        next;
311                    }
312                    delete $perl_unseen{$packed_file};
313                }
314                else {
315                    if ( $ignorable{$cpan_file} ) {
316                        print $outfh "  Ignored:   $cpan_file\n" if $verbose;
317                        next;
318                    }
319
320                    unless ($use_diff) {
321                        print $outfh "  CPAN only: $cpan_file",
322                          ( $cpan_file eq $mapped_file )
323                          ? "\n"
324                          : " (missing $mapped_file)\n";
325                    }
326                    next;
327                }
328            }
329
330            my $abs_cpan_file = catfile( $cpan_dir, $cpan_file );
331
332            # should never happen
333            die "ERROR: can't find file $abs_cpan_file\n"
334              unless -f $abs_cpan_file;
335
336            # might happen if the FILES entry in Maintainers.pl is wrong
337            unless ( -f $mapped_file ) {
338                print $outfh "WARNING: perl file not found: $mapped_file\n";
339                next;
340            }
341
342            my $relative_mapped_file = relatively_mapped($mapped_file);
343
344            my $different =
345              file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse,
346                $diff_opts );
347            if ( $different && customized( $m, $relative_mapped_file ) ) {
348                if (! $use_diff ) {
349                    print $outfh "  Customized for blead: $relative_mapped_file\n";
350                }
351            }
352            elsif ($different) {
353                if ($use_diff) {
354                    $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm;
355                    print $outfh $different;
356                }
357                else {
358                    if ( $cpan_file eq $relative_mapped_file ) {
359                        print $outfh "  Modified:  $relative_mapped_file\n";
360                    }
361                    else {
362                        print $outfh
363                          "  Modified:  $cpan_file $relative_mapped_file\n";
364                    }
365
366                    if ( $cpan_file =~ m{\.pm\z} ) {
367                        my $pv = MM->parse_version($mapped_file)   || 'unknown';
368                        my $cv = MM->parse_version($abs_cpan_file) || 'unknown';
369                        if ( $pv ne $cv ) {
370                            print $outfh
371"  Version mismatch in '$cpan_file':\n    $cv (cpan) vs $pv (perl)\n";
372                        }
373                    }
374
375                }
376            }
377            elsif ( customized( $m, $relative_mapped_file ) ) {
378                # Maintainers.pl says we customized it, but it looks the
379                # same as CPAN so maybe we lost the customization, which
380                # could be bad
381                if ( $cpan_file eq $relative_mapped_file ) {
382                    print $outfh "  Blead customization missing: $cpan_file\n";
383                }
384                else {
385                    print $outfh
386                      "  Blead customization missing: $cpan_file $relative_mapped_file\n";
387                }
388            }
389            elsif ($verbose) {
390                if ( $cpan_file eq $relative_mapped_file ) {
391                    print $outfh "  Unchanged: $cpan_file\n";
392                }
393                else {
394                    print $outfh
395                      "  Unchanged: $cpan_file $relative_mapped_file\n";
396                }
397            }
398        }
399        for ( sort keys %perl_unseen ) {
400            my $relative_mapped_file = relatively_mapped($_);
401            if ( customized( $m, $relative_mapped_file ) ) {
402                print $outfh "  Customized for blead: $_\n";
403            }
404            else {
405                print $outfh "  Perl only: $_\n" unless $use_diff;
406            }
407        }
408    }
409}
410
411sub relatively_mapped {
412    my $relative = shift;
413    $relative =~ s/^(cpan|dist|ext)\/.*?\///;
414    return $relative;
415}
416
417# given FooBar-1.23_45.tar.gz, return FooBar
418
419sub distro_base {
420    my $d = shift;
421    $d =~ s/\.tar\.gz$//;
422    $d =~ s/\.gip$//;
423    $d =~ s/[\d\-_\.]+$//;
424    return $d;
425}
426
427# process --crosscheck action:
428# ie list all distributions whose CPAN versions differ from that listed in
429# Maintainers.pl
430
431sub do_crosscheck {
432    my (
433        $outfh, $cache_dir, $mirror_url,
434        $force, $modules,   $wanted_upstreams,
435    ) = @_;
436
437    my $file         = '02packages.details.txt';
438    my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 );
439    my $path         = catfile( $download_dir, $file );
440    my $gzfile       = "$path.gz";
441
442    # grab 02packages.details.txt
443
444    my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" );
445
446    if ( !-f $gzfile or $force ) {
447        unlink $gzfile;
448        my_getstore( $url, $gzfile );
449    }
450    unlink $path;
451    IO::Uncompress::Gunzip::gunzip( $gzfile, $path )
452      or die
453      "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n";
454
455    # suck in the data from it
456
457    open my $fh, '<', $path
458      or die "ERROR: open: $file: $!\n";
459
460    my %distros;
461    my %modules;
462
463    while (<$fh>) {
464        next if 1 .. /^$/;
465        chomp;
466        my @f = split ' ', $_;
467        if ( @f != 3 ) {
468            warn
469              "WARNING: $file:$.: line doesn't have three fields (skipping)\n";
470            next;
471        }
472        my $distro = $f[2];
473        $distro =~ s{^[A-Z]/[A-Z]{2}/}{};    # strip leading A/AB/
474        $modules{ $f[0] } = $distro;
475
476        ( my $short_distro = $distro ) =~ s{^.*/}{};
477
478        $distros{ distro_base($short_distro) }{$distro} = 1;
479    }
480
481    my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams;
482    for my $module (@$modules) {
483        my $m = $Maintainers::Modules{$module}
484          or die "ERROR: No such module in Maintainers.pl: '$module'\n";
485
486        unless ( $m->{CPAN} ) {
487            print $outfh "\nWARNING: $module is not dual-life; skipping\n";
488            next;
489        }
490
491        # given an entry like
492        #   Foo::Bar 1.23 foo-bar-1.23.tar.gz,
493        # first compare the module name against Foo::Bar, and failing that,
494        # against foo-bar
495
496        my $pdist = $m->{DISTRIBUTION};
497        die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist;
498
499        my $upstream = $m->{UPSTREAM};
500        next if @$wanted_upstreams and !$wanted_upstream{$upstream};
501
502        my $cdist = $modules{$module};
503        ( my $short_pdist = $pdist ) =~ s{^.*/}{};
504
505        unless ( defined $cdist ) {
506            my $d = $distros{ distro_base($short_pdist) };
507            unless ( defined $d ) {
508                print $outfh "\n$module: Can't determine current CPAN entry\n";
509                next;
510            }
511            if ( keys %$d > 1 ) {
512                print $outfh
513                  "\n$module: (found more than one CPAN candidate):\n";
514                print $outfh "    perl: $pdist\n";
515                print $outfh "    CPAN: $_\n" for sort keys %$d;
516                next;
517            }
518            $cdist = ( keys %$d )[0];
519        }
520
521        if ( $cdist ne $pdist ) {
522            print $outfh "\n$module:\n    Perl: $pdist\n    CPAN: $cdist\n";
523        }
524    }
525}
526
527# get the EXCLUDED and MAP entries for this module, or
528# make up defauts if they don't exist
529
530sub get_map {
531    my ( $m, $module_name, $perl_files ) = @_;
532
533    my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)};
534
535    $excluded   ||= [];
536    $customized ||= [];
537
538    return $excluded, $map, $customized if $map;
539
540    # all files under ext/foo-bar (plus maybe some under t/lib)???
541
542    my $ext;
543    for (@$perl_files) {
544        if (m{^((?:ext|dist|cpan)/[^/]+/)}) {
545            if ( defined $ext and $ext ne $1 ) {
546
547                # more than one ext/$ext/
548                undef $ext;
549                last;
550            }
551            $ext = $1;
552        }
553        elsif (m{^t/lib/}) {
554            next;
555        }
556        else {
557            undef $ext;
558            last;
559        }
560    }
561
562    if ( defined $ext ) {
563        $map = { '' => $ext },;
564    }
565    else {
566        ( my $base = $module_name ) =~ s{::}{/}g;
567        $base = "lib/$base";
568        $map  = {
569            'lib/' => 'lib/',
570            ''     => "$base/",
571        };
572    }
573    return $excluded, $map, $customized;
574}
575
576# Given an exclude list and a mapping hash, convert a CPAN filename
577# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t').
578# Returns an empty list for an excluded file
579
580sub cpan_to_perl {
581    my ( $excluded, $map, $customized, $cpan_file ) = @_;
582
583    my %customized = map { ( $_ => 1 ) } @$customized;
584    for my $exclude (@$excluded) {
585        next if $customized{$exclude};
586
587        # may be a simple string to match exactly, or a pattern
588        if ( ref $exclude ) {
589            return if $cpan_file =~ $exclude;
590        }
591        else {
592            return if $cpan_file eq $exclude;
593        }
594    }
595
596    my $perl_file = $cpan_file;
597
598    # try longest prefix first, then alphabetically on tie-break
599    for
600      my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map )
601    {
602        last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/;
603    }
604    return $perl_file;
605}
606
607# fetch a file from a URL and store it in a file given by a filename
608
609sub my_getstore {
610    my ( $url, $file ) = @_;
611    File::Path::mkpath( File::Basename::dirname($file) );
612    if ( $url =~ qr{\Afile://(?:localhost)?/} ) {
613        ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{};
614        File::Copy::copy( $local_path, $file );
615    } else {
616        my $http = HTTP::Tiny->new;
617        my $response = $http->mirror($url, $file);
618        return $response->{success};
619    }
620}
621
622# download and unpack a distribution
623# Returns the full pathname of the extracted directory
624# (eg '/tmp/XYZ/Foo_bar-1.23')
625
626# cache_dir:  where to download the .tar.gz file to
627# mirror_url: CPAN mirror to download from
628# untar_dir:  where to untar or unzup the file
629# module:     name of module
630# dist:       name of the distribution
631
632sub get_distribution {
633    my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_;
634
635    $dist =~ m{.+/([^/]+)$}
636      or die
637      "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n";
638    my $filename = $1;
639
640    my $download_file = catfile( $src_dir, $filename );
641
642    # download distribution
643
644    if ( -f $download_file and !-s $download_file ) {
645
646        # failed download might leave a zero-length file
647        unlink $download_file;
648    }
649
650    unless ( -f $download_file ) {
651
652        # not cached
653        my $url = cpan_url_distribution( $mirror_url, $dist );
654        my_getstore( $url, $download_file )
655          or die "ERROR: Could not fetch '$url'\n";
656    }
657
658    # get the expected name of the extracted distribution dir
659
660    my $path = catfile( $untar_dir, $filename );
661
662    $path =~ s/\.tar\.gz$//
663      or $path =~ s/\.tgz$//
664      or $path =~ s/\.zip$//
665      or die
666      "ERROR: downloaded file does not have a recognised suffix: $path\n";
667
668    # extract it unless we already have it cached or tarball is newer
669    if ( !-d $path || ( -M $download_file < -M $path ) ) {
670        my $ae = Archive::Extract->new( archive => $download_file );
671        $ae->extract( to => $untar_dir )
672          or die
673          "ERROR: failed to extract distribution '$download_file to temp. dir: "
674          . $ae->error() . "\n";
675
676        $path = $ae->extract_path;
677    }
678
679    die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path;
680
681    return $path;
682}
683
684# produce the diff of a single file
685sub file_diff {
686    my $outfh     = shift;
687    my $cpan_file = shift;
688    my $perl_file = shift;
689    my $reverse   = shift;
690    my $diff_opts = shift;
691
692    my @cmd = ( DIFF_CMD, split ' ', $diff_opts );
693    if ($reverse) {
694        push @cmd, $perl_file, $cpan_file;
695    }
696    else {
697        push @cmd, $cpan_file, $perl_file;
698    }
699    return `@cmd`;
700
701}
702
703sub customized {
704    my ( $module_data, $file ) = @_;
705    return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} };
706}
707
708run();
709
710