1#!/usr/bin/env perl 2 3# core-cpan-diff: Compare CPAN modules with their equivalent in core 4 5# Originally based on App::DualLivedDiff by Steffen Mueller. 6 7use strict; 8use warnings; 9 10use 5.010; 11 12use Getopt::Long; 13use File::Basename (); 14use File::Copy (); 15use File::Temp (); 16use File::Path (); 17use File::Spec::Functions; 18use Archive::Extract; 19use IO::Uncompress::Gunzip (); 20use File::Compare (); 21use ExtUtils::Manifest; 22use ExtUtils::MakeMaker (); 23use HTTP::Tiny; 24 25BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } 26use lib 'Porting'; 27use Maintainers (); 28 29# if running from blead, we may be doing -Ilib, which means when we 30# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. 31# So preload the things we need, and tell it to check %INC first: 32 33use Archive::Tar; 34use IPC::Open3; 35use IO::Select; 36$Module::Load::Conditional::CHECK_INC_HASH = 1; 37 38# stop Archive::Extract whinging about lack of Archive::Zip 39$Archive::Extract::WARN = 0; 40 41# where, under the cache dir, to download tarballs to 42use constant SRC_DIR => 'tarballs'; 43 44# where, under the cache dir, to untar stuff to 45use constant UNTAR_DIR => 'untarred'; 46 47use constant DIFF_CMD => 'diff'; 48 49sub usage { 50 print STDERR "\n@_\n\n" if @_; 51 print STDERR <<HERE; 52Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] 53 54-a/--all Scan all dual-life modules. 55 56-c/--cachedir Where to save downloaded CPAN tarball files 57 (defaults to /tmp/something/ with deletion after each run). 58 59-d/--diff Display file differences using diff(1), rather than just 60 listing which files have changed. 61 62--diffopts Options to pass to the diff command. Defaults to '-u'. 63 64-f|force Force download from CPAN of new 02packages.details.txt file 65 (with --crosscheck only). 66 67-m|mirror Preferred CPAN mirror URI (http:// or file:///) 68 (Local mirror must be a complete mirror, not minicpan) 69 70-o/--output File name to write output to (defaults to STDOUT). 71 72-r/--reverse Reverses the diff (perl to CPAN). 73 74-u/--upstream only print modules with the given upstream (defaults to all) 75 76-v/--verbose List the fate of *all* files in the tarball, not just those 77 that differ or are missing. 78 79-x|crosscheck List the distributions whose current CPAN version differs from 80 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). 81 82By default (i.e. without the --crosscheck option), for each listed module 83(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball 84from CPAN associated with that module, and compare the files in it with 85those in the perl source tree. 86 87Must be run from the root of the perl source tree. 88Module names must match the keys of %Modules in Maintainers.pl. 89 90The diff(1) command is assumed to be in your PATH and is used to diff files 91regardless of whether the --diff option has been chosen to display any file 92differences. 93HERE 94 exit(1); 95} 96 97sub run { 98 my $scan_all; 99 my $diff_opts; 100 my $reverse = 0; 101 my @wanted_upstreams; 102 my $cache_dir; 103 my $mirror_url = "http://www.cpan.org/"; 104 my $use_diff; 105 my $output_file; 106 my $verbose; 107 my $force; 108 my $do_crosscheck; 109 110 GetOptions( 111 'a|all' => \$scan_all, 112 'c|cachedir=s' => \$cache_dir, 113 'd|diff' => \$use_diff, 114 'diffopts:s' => \$diff_opts, 115 'f|force' => \$force, 116 'h|help' => \&usage, 117 'm|mirror=s' => \$mirror_url, 118 'o|output=s' => \$output_file, 119 'r|reverse' => \$reverse, 120 'u|upstream=s@' => \@wanted_upstreams, 121 'v|verbose' => \$verbose, 122 'x|crosscheck' => \$do_crosscheck, 123 ) or usage; 124 125 @wanted_upstreams = map { $_ eq 'undef' ? undef : $_ } @wanted_upstreams; 126 127 my @modules; 128 129 usage("Cannot mix -a with module list") if $scan_all && @ARGV; 130 131 if ($do_crosscheck) { 132 usage("can't use -r, -d, --diffopts, -v with --crosscheck") 133 if ( $reverse || $use_diff || $diff_opts || $verbose ); 134 } 135 else { 136 $diff_opts = '-u -b' unless defined $diff_opts; 137 usage("can't use -f without --crosscheck") if $force; 138 } 139 140 @modules = 141 $scan_all 142 ? grep $Maintainers::Modules{$_}{CPAN}, 143 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules ) 144 : @ARGV; 145 usage("No modules specified") unless @modules; 146 147 my $outfh; 148 if ( defined $output_file ) { 149 open $outfh, '>', $output_file 150 or die "ERROR: could not open file '$output_file' for writing: $!\n"; 151 } 152 else { 153 open $outfh, ">&STDOUT" 154 or die "ERROR: can't dup STDOUT: $!\n"; 155 } 156 157 if ( defined $cache_dir ) { 158 die "ERROR: not a directory: '$cache_dir'\n" 159 if !-d $cache_dir && -e $cache_dir; 160 File::Path::mkpath($cache_dir); 161 } 162 else { 163 $cache_dir = File::Temp::tempdir( CLEANUP => 1 ); 164 } 165 166 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/"; 167 my $test_file = "modules/03modlist.data.gz"; 168 my_getstore( 169 cpan_url( $mirror_url, $test_file ), 170 catfile( $cache_dir, $test_file ) 171 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n"; 172 173 if ($do_crosscheck) { 174 do_crosscheck( 175 $outfh, $cache_dir, $mirror_url, 176 $force, \@modules, \@wanted_upstreams 177 ); 178 } 179 else { 180 do_compare( 181 \@modules, $outfh, $output_file, 182 $cache_dir, $mirror_url, $verbose, 183 $use_diff, $reverse, $diff_opts, 184 \@wanted_upstreams 185 ); 186 } 187} 188 189# construct a CPAN url 190 191sub cpan_url { 192 my ( $mirror_url, @path ) = @_; 193 return $mirror_url unless @path; 194 my $cpan_path = join( "/", map { split "/", $_ } @path ); 195 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing 196 return $mirror_url . $cpan_path; 197} 198 199# construct a CPAN URL for a author/distribution string like: 200# BINGOS/Archive-Extract-0.52.tar.gz 201 202sub cpan_url_distribution { 203 my ( $mirror_url, $distribution ) = @_; 204 $distribution =~ /^([A-Z])([A-Z])/ 205 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n"; 206 my $path = "authors/id/$1/$1$2/$distribution"; 207 return cpan_url( $mirror_url, $path ); 208} 209 210# compare a list of modules against their CPAN equivalents 211 212sub do_compare { 213 my ( 214 $modules, $outfh, $output_file, $cache_dir, 215 $mirror_url, $verbose, $use_diff, $reverse, 216 $diff_opts, $wanted_upstreams 217 ) = @_; 218 219 # first, make sure we have a directory where they can all be untarred, 220 # and if its a permanent directory, clear any previous content 221 my $untar_dir = catdir( $cache_dir, UNTAR_DIR ); 222 my $src_dir = catdir( $cache_dir, SRC_DIR ); 223 for my $d ( $src_dir, $untar_dir ) { 224 next if -d $d; 225 mkdir $d or die "mkdir $d: $!\n"; 226 } 227 228 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE; 229 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams; 230 231 my %seen_dist; 232 for my $module (@$modules) { 233 warn "Processing $module ...\n" if defined $output_file; 234 235 my $m = $Maintainers::Modules{$module} 236 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 237 238 unless ( $m->{CPAN} ) { 239 print $outfh "WARNING: $module is not dual-life; skipping\n"; 240 next; 241 } 242 243 my $dist = $m->{DISTRIBUTION}; 244 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; 245 246 if ( $seen_dist{$dist}++ ) { 247 warn "WARNING: duplicate entry for $dist in $module\n"; 248 } 249 250 my $upstream = $m->{UPSTREAM}; 251 next if @$wanted_upstreams and !$wanted_upstream{$upstream}; 252 253 print $outfh "\n$module - " 254 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n"; 255 print $outfh " upstream is: " 256 . ( $m->{UPSTREAM} // 'UNKNOWN!' ) . "\n"; 257 258 my $cpan_dir; 259 eval { 260 $cpan_dir = 261 get_distribution( $src_dir, $mirror_url, $untar_dir, $module, 262 $dist ); 263 }; 264 if ($@) { 265 print $outfh " ", $@; 266 print $outfh " (skipping)\n"; 267 next; 268 } 269 270 my @perl_files = Maintainers::get_module_files($module); 271 272 my $manifest = catfile( $cpan_dir, 'MANIFEST' ); 273 die "ERROR: no such file: $manifest\n" unless -f $manifest; 274 275 my $cpan_files = ExtUtils::Manifest::maniread($manifest); 276 my @cpan_files = sort keys %$cpan_files; 277 278 ( my $main_pm = $module ) =~ s{::}{/}g; 279 $main_pm .= ".pm"; 280 281 my ( $excluded, $map, $customized ) = 282 get_map( $m, $module, \@perl_files ); 283 284 my %perl_unseen; 285 @perl_unseen{@perl_files} = (); 286 my %perl_files = %perl_unseen; 287 288 foreach my $cpan_file (@cpan_files) { 289 my $mapped_file = 290 cpan_to_perl( $excluded, $map, $customized, $cpan_file ); 291 unless ( defined $mapped_file ) { 292 print $outfh " Excluded: $cpan_file\n" if $verbose; 293 next; 294 } 295 296 if ( exists $perl_files{$mapped_file} ) { 297 delete $perl_unseen{$mapped_file}; 298 } 299 else { 300 301 # some CPAN files foo are stored in core as foo.packed, 302 # which are then unpacked by 'make test_prep' 303 my $packed_file = "$mapped_file.packed"; 304 if ( exists $perl_files{$packed_file} ) { 305 if ( !-f $mapped_file and -f $packed_file ) { 306 print $outfh <<EOF; 307WARNING: $mapped_file not found, but .packed variant exists. 308Perhaps you need to run 'make test_prep'? 309EOF 310 next; 311 } 312 delete $perl_unseen{$packed_file}; 313 } 314 else { 315 if ( $ignorable{$cpan_file} ) { 316 print $outfh " Ignored: $cpan_file\n" if $verbose; 317 next; 318 } 319 320 unless ($use_diff) { 321 print $outfh " CPAN only: $cpan_file", 322 ( $cpan_file eq $mapped_file ) 323 ? "\n" 324 : " (missing $mapped_file)\n"; 325 } 326 next; 327 } 328 } 329 330 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file ); 331 332 # should never happen 333 die "ERROR: can't find file $abs_cpan_file\n" 334 unless -f $abs_cpan_file; 335 336 # might happen if the FILES entry in Maintainers.pl is wrong 337 unless ( -f $mapped_file ) { 338 print $outfh "WARNING: perl file not found: $mapped_file\n"; 339 next; 340 } 341 342 my $relative_mapped_file = relatively_mapped($mapped_file); 343 344 my $different = 345 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse, 346 $diff_opts ); 347 if ( $different && customized( $m, $relative_mapped_file ) ) { 348 if (! $use_diff ) { 349 print $outfh " Customized for blead: $relative_mapped_file\n"; 350 } 351 } 352 elsif ($different) { 353 if ($use_diff) { 354 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; 355 print $outfh $different; 356 } 357 else { 358 if ( $cpan_file eq $relative_mapped_file ) { 359 print $outfh " Modified: $relative_mapped_file\n"; 360 } 361 else { 362 print $outfh 363 " Modified: $cpan_file $relative_mapped_file\n"; 364 } 365 366 if ( $cpan_file =~ m{\.pm\z} ) { 367 my $pv = MM->parse_version($mapped_file) || 'unknown'; 368 my $cv = MM->parse_version($abs_cpan_file) || 'unknown'; 369 if ( $pv ne $cv ) { 370 print $outfh 371" Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n"; 372 } 373 } 374 375 } 376 } 377 elsif ( customized( $m, $relative_mapped_file ) ) { 378 # Maintainers.pl says we customized it, but it looks the 379 # same as CPAN so maybe we lost the customization, which 380 # could be bad 381 if ( $cpan_file eq $relative_mapped_file ) { 382 print $outfh " Blead customization missing: $cpan_file\n"; 383 } 384 else { 385 print $outfh 386 " Blead customization missing: $cpan_file $relative_mapped_file\n"; 387 } 388 } 389 elsif ($verbose) { 390 if ( $cpan_file eq $relative_mapped_file ) { 391 print $outfh " Unchanged: $cpan_file\n"; 392 } 393 else { 394 print $outfh 395 " Unchanged: $cpan_file $relative_mapped_file\n"; 396 } 397 } 398 } 399 for ( sort keys %perl_unseen ) { 400 my $relative_mapped_file = relatively_mapped($_); 401 if ( customized( $m, $relative_mapped_file ) ) { 402 print $outfh " Customized for blead: $_\n"; 403 } 404 else { 405 print $outfh " Perl only: $_\n" unless $use_diff; 406 } 407 } 408 } 409} 410 411sub relatively_mapped { 412 my $relative = shift; 413 $relative =~ s/^(cpan|dist|ext)\/.*?\///; 414 return $relative; 415} 416 417# given FooBar-1.23_45.tar.gz, return FooBar 418 419sub distro_base { 420 my $d = shift; 421 $d =~ s/\.tar\.gz$//; 422 $d =~ s/\.gip$//; 423 $d =~ s/[\d\-_\.]+$//; 424 return $d; 425} 426 427# process --crosscheck action: 428# ie list all distributions whose CPAN versions differ from that listed in 429# Maintainers.pl 430 431sub do_crosscheck { 432 my ( 433 $outfh, $cache_dir, $mirror_url, 434 $force, $modules, $wanted_upstreams, 435 ) = @_; 436 437 my $file = '02packages.details.txt'; 438 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 439 my $path = catfile( $download_dir, $file ); 440 my $gzfile = "$path.gz"; 441 442 # grab 02packages.details.txt 443 444 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" ); 445 446 if ( !-f $gzfile or $force ) { 447 unlink $gzfile; 448 my_getstore( $url, $gzfile ); 449 } 450 unlink $path; 451 IO::Uncompress::Gunzip::gunzip( $gzfile, $path ) 452 or die 453 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; 454 455 # suck in the data from it 456 457 open my $fh, '<', $path 458 or die "ERROR: open: $file: $!\n"; 459 460 my %distros; 461 my %modules; 462 463 while (<$fh>) { 464 next if 1 .. /^$/; 465 chomp; 466 my @f = split ' ', $_; 467 if ( @f != 3 ) { 468 warn 469 "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; 470 next; 471 } 472 my $distro = $f[2]; 473 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ 474 $modules{ $f[0] } = $distro; 475 476 ( my $short_distro = $distro ) =~ s{^.*/}{}; 477 478 $distros{ distro_base($short_distro) }{$distro} = 1; 479 } 480 481 my %wanted_upstream = map { ( $_ => 1 ) } @$wanted_upstreams; 482 for my $module (@$modules) { 483 my $m = $Maintainers::Modules{$module} 484 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 485 486 unless ( $m->{CPAN} ) { 487 print $outfh "\nWARNING: $module is not dual-life; skipping\n"; 488 next; 489 } 490 491 # given an entry like 492 # Foo::Bar 1.23 foo-bar-1.23.tar.gz, 493 # first compare the module name against Foo::Bar, and failing that, 494 # against foo-bar 495 496 my $pdist = $m->{DISTRIBUTION}; 497 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; 498 499 my $upstream = $m->{UPSTREAM}; 500 next if @$wanted_upstreams and !$wanted_upstream{$upstream}; 501 502 my $cdist = $modules{$module}; 503 ( my $short_pdist = $pdist ) =~ s{^.*/}{}; 504 505 unless ( defined $cdist ) { 506 my $d = $distros{ distro_base($short_pdist) }; 507 unless ( defined $d ) { 508 print $outfh "\n$module: Can't determine current CPAN entry\n"; 509 next; 510 } 511 if ( keys %$d > 1 ) { 512 print $outfh 513 "\n$module: (found more than one CPAN candidate):\n"; 514 print $outfh " perl: $pdist\n"; 515 print $outfh " CPAN: $_\n" for sort keys %$d; 516 next; 517 } 518 $cdist = ( keys %$d )[0]; 519 } 520 521 if ( $cdist ne $pdist ) { 522 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; 523 } 524 } 525} 526 527# get the EXCLUDED and MAP entries for this module, or 528# make up defauts if they don't exist 529 530sub get_map { 531 my ( $m, $module_name, $perl_files ) = @_; 532 533 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)}; 534 535 $excluded ||= []; 536 $customized ||= []; 537 538 return $excluded, $map, $customized if $map; 539 540 # all files under ext/foo-bar (plus maybe some under t/lib)??? 541 542 my $ext; 543 for (@$perl_files) { 544 if (m{^((?:ext|dist|cpan)/[^/]+/)}) { 545 if ( defined $ext and $ext ne $1 ) { 546 547 # more than one ext/$ext/ 548 undef $ext; 549 last; 550 } 551 $ext = $1; 552 } 553 elsif (m{^t/lib/}) { 554 next; 555 } 556 else { 557 undef $ext; 558 last; 559 } 560 } 561 562 if ( defined $ext ) { 563 $map = { '' => $ext },; 564 } 565 else { 566 ( my $base = $module_name ) =~ s{::}{/}g; 567 $base = "lib/$base"; 568 $map = { 569 'lib/' => 'lib/', 570 '' => "$base/", 571 }; 572 } 573 return $excluded, $map, $customized; 574} 575 576# Given an exclude list and a mapping hash, convert a CPAN filename 577# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). 578# Returns an empty list for an excluded file 579 580sub cpan_to_perl { 581 my ( $excluded, $map, $customized, $cpan_file ) = @_; 582 583 my %customized = map { ( $_ => 1 ) } @$customized; 584 for my $exclude (@$excluded) { 585 next if $customized{$exclude}; 586 587 # may be a simple string to match exactly, or a pattern 588 if ( ref $exclude ) { 589 return if $cpan_file =~ $exclude; 590 } 591 else { 592 return if $cpan_file eq $exclude; 593 } 594 } 595 596 my $perl_file = $cpan_file; 597 598 # try longest prefix first, then alphabetically on tie-break 599 for 600 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map ) 601 { 602 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; 603 } 604 return $perl_file; 605} 606 607# fetch a file from a URL and store it in a file given by a filename 608 609sub my_getstore { 610 my ( $url, $file ) = @_; 611 File::Path::mkpath( File::Basename::dirname($file) ); 612 if ( $url =~ qr{\Afile://(?:localhost)?/} ) { 613 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{}; 614 File::Copy::copy( $local_path, $file ); 615 } else { 616 my $http = HTTP::Tiny->new; 617 my $response = $http->mirror($url, $file); 618 return $response->{success}; 619 } 620} 621 622# download and unpack a distribution 623# Returns the full pathname of the extracted directory 624# (eg '/tmp/XYZ/Foo_bar-1.23') 625 626# cache_dir: where to download the .tar.gz file to 627# mirror_url: CPAN mirror to download from 628# untar_dir: where to untar or unzup the file 629# module: name of module 630# dist: name of the distribution 631 632sub get_distribution { 633 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_; 634 635 $dist =~ m{.+/([^/]+)$} 636 or die 637 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; 638 my $filename = $1; 639 640 my $download_file = catfile( $src_dir, $filename ); 641 642 # download distribution 643 644 if ( -f $download_file and !-s $download_file ) { 645 646 # failed download might leave a zero-length file 647 unlink $download_file; 648 } 649 650 unless ( -f $download_file ) { 651 652 # not cached 653 my $url = cpan_url_distribution( $mirror_url, $dist ); 654 my_getstore( $url, $download_file ) 655 or die "ERROR: Could not fetch '$url'\n"; 656 } 657 658 # get the expected name of the extracted distribution dir 659 660 my $path = catfile( $untar_dir, $filename ); 661 662 $path =~ s/\.tar\.gz$// 663 or $path =~ s/\.tgz$// 664 or $path =~ s/\.zip$// 665 or die 666 "ERROR: downloaded file does not have a recognised suffix: $path\n"; 667 668 # extract it unless we already have it cached or tarball is newer 669 if ( !-d $path || ( -M $download_file < -M $path ) ) { 670 my $ae = Archive::Extract->new( archive => $download_file ); 671 $ae->extract( to => $untar_dir ) 672 or die 673 "ERROR: failed to extract distribution '$download_file to temp. dir: " 674 . $ae->error() . "\n"; 675 676 $path = $ae->extract_path; 677 } 678 679 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; 680 681 return $path; 682} 683 684# produce the diff of a single file 685sub file_diff { 686 my $outfh = shift; 687 my $cpan_file = shift; 688 my $perl_file = shift; 689 my $reverse = shift; 690 my $diff_opts = shift; 691 692 my @cmd = ( DIFF_CMD, split ' ', $diff_opts ); 693 if ($reverse) { 694 push @cmd, $perl_file, $cpan_file; 695 } 696 else { 697 push @cmd, $cpan_file, $perl_file; 698 } 699 return `@cmd`; 700 701} 702 703sub customized { 704 my ( $module_data, $file ) = @_; 705 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} }; 706} 707 708run(); 709 710