1#!/usr/bin/env perl 2 3# core-cpan-diff: Compare CPAN modules with their equivalent in core 4 5# Originally based on App::DualLivedDiff by Steffen Mueller. 6 7use strict; 8use warnings; 9 10use 5.010; 11 12use Getopt::Long; 13use File::Basename (); 14use File::Copy (); 15use File::Temp (); 16use File::Path (); 17use File::Spec::Functions; 18use Archive::Extract; 19use IO::Uncompress::Gunzip (); 20use File::Compare (); 21use ExtUtils::Manifest; 22use ExtUtils::MakeMaker (); 23use HTTP::Tiny; 24 25BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } 26use lib 'Porting'; 27use Maintainers (); 28 29# if running from blead, we may be doing -Ilib, which means when we 30# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. 31# So preload the things we need, and tell it to check %INC first: 32 33use Archive::Tar; 34use IPC::Open3; 35use IO::Select; 36$Module::Load::Conditional::CHECK_INC_HASH = 1; 37 38# stop Archive::Extract whinging about lack of Archive::Zip 39$Archive::Extract::WARN = 0; 40 41# where, under the cache dir, to download tarballs to 42use constant SRC_DIR => 'tarballs'; 43 44# where, under the cache dir, to untar stuff to 45use constant UNTAR_DIR => 'untarred'; 46 47use constant DIFF_CMD => 'diff'; 48use constant WGET_CMD => 'wget'; 49 50sub usage { 51 print STDERR "\n@_\n\n" if @_; 52 print STDERR <<HERE; 53Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] 54 55-a/--all Scan all dual-life modules. 56 57-c/--cachedir Where to save downloaded CPAN tarball files 58 (defaults to /tmp/something/ with deletion after each run). 59 60-d/--diff Display file differences using diff(1), rather than just 61 listing which files have changed. 62 The diff(1) command is assumed to be in your PATH. 63 64--diffopts Options to pass to the diff command. Defaults to '-u'. 65 66-f|force Force download from CPAN of new 02packages.details.txt file 67 (with --crosscheck only). 68 69-m|mirror Preferred CPAN mirror URI (http:// or file:///) 70 (Local mirror must be a complete mirror, not minicpan) 71 72-o/--output File name to write output to (defaults to STDOUT). 73 74-r/--reverse Reverses the diff (perl to CPAN). 75 76-u/--upstream only print modules with the given upstream (defaults to all) 77 78-v/--verbose List the fate of *all* files in the tarball, not just those 79 that differ or are missing. 80 81-x|crosscheck List the distributions whose current CPAN version differs from 82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). 83 84By default (i.e. without the --crosscheck option), for each listed module 85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball 86from CPAN associated with that module, and compare the files in it with 87those in the perl source tree. 88 89Must be run from the root of the perl source tree. 90Module names must match the keys of %Modules in Maintainers.pl. 91HERE 92 exit(1); 93} 94 95sub run { 96 my $scan_all; 97 my $diff_opts; 98 my $reverse = 0; 99 my @wanted_upstreams; 100 my $cache_dir; 101 my $mirror_url = "http://www.cpan.org/"; 102 my $use_diff; 103 my $output_file; 104 my $verbose; 105 my $force; 106 my $do_crosscheck; 107 108 GetOptions( 109 'a|all' => \$scan_all, 110 'c|cachedir=s' => \$cache_dir, 111 'd|diff' => \$use_diff, 112 'diffopts:s' => \$diff_opts, 113 'f|force' => \$force, 114 'h|help' => \&usage, 115 'm|mirror=s' => \$mirror_url, 116 'o|output=s' => \$output_file, 117 'r|reverse' => \$reverse, 118 'u|upstream=s@' => \@wanted_upstreams, 119 'v|verbose' => \$verbose, 120 'x|crosscheck' => \$do_crosscheck, 121 ) or usage; 122 123 my @modules; 124 125 usage("Cannot mix -a with module list") if $scan_all && @ARGV; 126 127 if ($do_crosscheck) { 128 usage("can't use -r, -d, --diffopts, -v with --crosscheck") 129 if ( $reverse || $use_diff || $diff_opts || $verbose ); 130 } 131 else { 132 $diff_opts = '-u -b' unless defined $diff_opts; 133 usage("can't use -f without --crosscheck") if $force; 134 } 135 136 @modules = 137 $scan_all 138 ? grep $Maintainers::Modules{$_}{CPAN}, 139 ( sort { lc $a cmp lc $b } keys %Maintainers::Modules ) 140 : @ARGV; 141 usage("No modules specified") unless @modules; 142 143 my $outfh; 144 if ( defined $output_file ) { 145 open $outfh, '>', $output_file 146 or die "ERROR: could not open file '$output_file' for writing: $!\n"; 147 } 148 else { 149 open $outfh, ">&STDOUT" 150 or die "ERROR: can't dup STDOUT: $!\n"; 151 } 152 153 if ( defined $cache_dir ) { 154 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; 155 } 156 else { 157 $cache_dir = File::Temp::tempdir( CLEANUP => 1 ); 158 } 159 160 $mirror_url .= "/" unless substr( $mirror_url, -1 ) eq "/"; 161 my $test_file = "modules/03modlist.data.gz"; 162 my_getstore( 163 cpan_url( $mirror_url, $test_file ), 164 catfile( $cache_dir, $test_file ) 165 ) or die "ERROR: not a CPAN mirror '$mirror_url'\n"; 166 167 if ($do_crosscheck) { 168 do_crosscheck( $outfh, $cache_dir, $mirror_url, $force, \@modules ); 169 } 170 else { 171 do_compare( 172 \@modules, $outfh, $output_file, 173 $cache_dir, $mirror_url, $verbose, 174 $use_diff, $reverse, $diff_opts, 175 \@wanted_upstreams 176 ); 177 } 178} 179 180# construct a CPAN url 181 182sub cpan_url { 183 my ( $mirror_url, @path ) = @_; 184 return $mirror_url unless @path; 185 my $cpan_path = join( "/", map { split "/", $_ } @path ); 186 $cpan_path =~ s{\A/}{}; # remove leading slash since url has one trailing 187 return $mirror_url . $cpan_path; 188} 189 190# construct a CPAN URL for a author/distribution string like: 191# BINGOS/Archive-Extract-0.52.tar.gz 192 193sub cpan_url_distribution { 194 my ( $mirror_url, $distribution ) = @_; 195 $distribution =~ /^([A-Z])([A-Z])/ 196 or die "ERROR: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $distribution\n"; 197 my $path = "modules/by-authors/id/$1/$1$2/$distribution"; 198 return cpan_url( $mirror_url, $path ); 199} 200 201# compare a list of modules against their CPAN equivalents 202 203sub do_compare { 204 my ( 205 $modules, $outfh, $output_file, $cache_dir, 206 $mirror_url, $verbose, $use_diff, $reverse, 207 $diff_opts, $wanted_upstreams 208 ) = @_; 209 210 # first, make sure we have a directory where they can all be untarred, 211 # and if its a permanent directory, clear any previous content 212 my $untar_dir = catdir( $cache_dir, UNTAR_DIR ); 213 my $src_dir = catdir( $cache_dir, SRC_DIR ); 214 for my $d ( $src_dir, $untar_dir ) { 215 next if -d $d; 216 mkdir $d or die "mkdir $d: $!\n"; 217 } 218 219 my %ignorable = map { ( $_ => 1 ) } @Maintainers::IGNORABLE; 220 221 my %seen_dist; 222 for my $module (@$modules) { 223 warn "Processing $module ...\n" if defined $output_file; 224 225 my $m = $Maintainers::Modules{$module} 226 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 227 228 unless ( $m->{CPAN} ) { 229 print $outfh "WARNING: $module is not dual-life; skipping\n"; 230 next; 231 } 232 233 my $dist = $m->{DISTRIBUTION}; 234 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; 235 236 if ( $seen_dist{$dist}++ ) { 237 warn "WARNING: duplicate entry for $dist in $module\n"; 238 } 239 240 my $upstream = $m->{UPSTREAM} || 'UNKNOWN'; 241 next if @$wanted_upstreams and !( $upstream ~~ $wanted_upstreams ); 242 243 print $outfh "\n$module - " 244 . $Maintainers::Modules{$module}->{DISTRIBUTION} . "\n"; 245 print $outfh " upstream is: " 246 . ( $m->{UPSTREAM} || 'UNKNOWN!' ) . "\n"; 247 248 my $cpan_dir; 249 eval { 250 $cpan_dir = 251 get_distribution( $src_dir, $mirror_url, $untar_dir, $module, 252 $dist ); 253 }; 254 if ($@) { 255 print $outfh " ", $@; 256 print $outfh " (skipping)\n"; 257 next; 258 } 259 260 my @perl_files = Maintainers::get_module_files($module); 261 262 my $manifest = catfile( $cpan_dir, 'MANIFEST' ); 263 die "ERROR: no such file: $manifest\n" unless -f $manifest; 264 265 my $cpan_files = ExtUtils::Manifest::maniread($manifest); 266 my @cpan_files = sort keys %$cpan_files; 267 268 ( my $main_pm = $module ) =~ s{::}{/}g; 269 $main_pm .= ".pm"; 270 271 my ( $excluded, $map, $customized ) = 272 get_map( $m, $module, \@perl_files ); 273 274 my %perl_unseen; 275 @perl_unseen{@perl_files} = (); 276 my %perl_files = %perl_unseen; 277 278 foreach my $cpan_file (@cpan_files) { 279 my $mapped_file = 280 cpan_to_perl( $excluded, $map, $customized, $cpan_file ); 281 unless ( defined $mapped_file ) { 282 print $outfh " Excluded: $cpan_file\n" if $verbose; 283 next; 284 } 285 286 if ( exists $perl_files{$mapped_file} ) { 287 delete $perl_unseen{$mapped_file}; 288 } 289 else { 290 291 # some CPAN files foo are stored in core as foo.packed, 292 # which are then unpacked by 'make test_prep' 293 my $packed_file = "$mapped_file.packed"; 294 if ( exists $perl_files{$packed_file} ) { 295 if ( !-f $mapped_file and -f $packed_file ) { 296 print $outfh <<EOF; 297WARNING: $mapped_file not found, but .packed variant exists. 298Perhaps you need to run 'make test_prep'? 299EOF 300 next; 301 } 302 delete $perl_unseen{$packed_file}; 303 } 304 else { 305 if ( $ignorable{$cpan_file} ) { 306 print $outfh " Ignored: $cpan_file\n" if $verbose; 307 next; 308 } 309 310 unless ($use_diff) { 311 print $outfh " CPAN only: $cpan_file", 312 ( $cpan_file eq $mapped_file ) 313 ? "\n" 314 : " (missing $mapped_file)\n"; 315 } 316 next; 317 } 318 } 319 320 my $abs_cpan_file = catfile( $cpan_dir, $cpan_file ); 321 322 # should never happen 323 die "ERROR: can't find file $abs_cpan_file\n" 324 unless -f $abs_cpan_file; 325 326 # might happen if the FILES entry in Maintainers.pl is wrong 327 unless ( -f $mapped_file ) { 328 print $outfh "WARNING: perl file not found: $mapped_file\n"; 329 next; 330 } 331 332 my $relative_mapped_file = relatively_mapped($mapped_file); 333 334 my $different = 335 file_diff( $outfh, $abs_cpan_file, $mapped_file, $reverse, 336 $diff_opts ); 337 if ( $different && customized( $m, $relative_mapped_file ) ) { 338 if (! $use_diff ) { 339 print $outfh " Customized for blead: $relative_mapped_file\n"; 340 } 341 } 342 elsif ($different) { 343 if ($use_diff) { 344 $different =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; 345 print $outfh $different; 346 } 347 else { 348 if ( $cpan_file eq $relative_mapped_file ) { 349 print $outfh " Modified: $relative_mapped_file\n"; 350 } 351 else { 352 print $outfh 353 " Modified: $cpan_file $relative_mapped_file\n"; 354 } 355 356 if ( $cpan_file =~ m{\.pm\z} ) { 357 my $pv = MM->parse_version($mapped_file) || 'unknown'; 358 my $cv = MM->parse_version($abs_cpan_file) || 'unknown'; 359 if ( $pv ne $cv ) { 360 print $outfh 361" Version mismatch in '$cpan_file':\n $cv (cpan) vs $pv (perl)\n"; 362 } 363 } 364 365 } 366 } 367 elsif ( customized( $m, $relative_mapped_file ) ) { 368 # Maintainers.pl says we customized it, but it looks the 369 # same as CPAN so maybe we lost the customization, which 370 # could be bad 371 if ( $cpan_file eq $relative_mapped_file ) { 372 print $outfh " Blead customization missing: $cpan_file\n"; 373 } 374 else { 375 print $outfh 376 " Blead customization missing: $cpan_file $relative_mapped_file\n"; 377 } 378 } 379 elsif ($verbose) { 380 if ( $cpan_file eq $relative_mapped_file ) { 381 print $outfh " Unchanged: $cpan_file\n"; 382 } 383 else { 384 print $outfh 385 " Unchanged: $cpan_file $relative_mapped_file\n"; 386 } 387 } 388 } 389 for ( sort keys %perl_unseen ) { 390 my $relative_mapped_file = relatively_mapped($_); 391 if ( customized( $m, $relative_mapped_file ) ) { 392 print $outfh " Customized for blead: $_\n"; 393 } 394 else { 395 print $outfh " Perl only: $_\n" unless $use_diff; 396 } 397 } 398 } 399} 400 401sub relatively_mapped { 402 my $relative = shift; 403 $relative =~ s/^(cpan|dist|ext)\/.*?\///; 404 return $relative; 405} 406 407# given FooBar-1.23_45.tar.gz, return FooBar 408 409sub distro_base { 410 my $d = shift; 411 $d =~ s/\.tar\.gz$//; 412 $d =~ s/\.gip$//; 413 $d =~ s/[\d\-_\.]+$//; 414 return $d; 415} 416 417# process --crosscheck action: 418# ie list all distributions whose CPAN versions differ from that listed in 419# Maintainers.pl 420 421sub do_crosscheck { 422 my ( $outfh, $cache_dir, $mirror_url, $force, $modules ) = @_; 423 424 my $file = '02packages.details.txt'; 425 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 426 my $path = catfile( $download_dir, $file ); 427 my $gzfile = "$path.gz"; 428 429 # grab 02packages.details.txt 430 431 my $url = cpan_url( $mirror_url, "modules/02packages.details.txt.gz" ); 432 433 if ( !-f $gzfile or $force ) { 434 unlink $gzfile; 435 my_getstore( $url, $gzfile ); 436 } 437 unlink $path; 438 IO::Uncompress::Gunzip::gunzip( $gzfile, $path ) 439 or die 440 "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; 441 442 # suck in the data from it 443 444 open my $fh, '<', $path 445 or die "ERROR: open: $file: $!\n"; 446 447 my %distros; 448 my %modules; 449 450 while (<$fh>) { 451 next if 1 .. /^$/; 452 chomp; 453 my @f = split ' ', $_; 454 if ( @f != 3 ) { 455 warn 456 "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; 457 next; 458 } 459 my $distro = $f[2]; 460 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ 461 $modules{ $f[0] } = $distro; 462 463 ( my $short_distro = $distro ) =~ s{^.*/}{}; 464 465 $distros{ distro_base($short_distro) }{$distro} = 1; 466 } 467 468 for my $module (@$modules) { 469 my $m = $Maintainers::Modules{$module} 470 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 471 472 unless ( $m->{CPAN} ) { 473 print $outfh "\nWARNING: $module is not dual-life; skipping\n"; 474 next; 475 } 476 477 # given an entry like 478 # Foo::Bar 1.23 foo-bar-1.23.tar.gz, 479 # first compare the module name against Foo::Bar, and failing that, 480 # against foo-bar 481 482 my $pdist = $m->{DISTRIBUTION}; 483 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; 484 485 my $cdist = $modules{$module}; 486 ( my $short_pdist = $pdist ) =~ s{^.*/}{}; 487 488 unless ( defined $cdist ) { 489 my $d = $distros{ distro_base($short_pdist) }; 490 unless ( defined $d ) { 491 print $outfh "\n$module: Can't determine current CPAN entry\n"; 492 next; 493 } 494 if ( keys %$d > 1 ) { 495 print $outfh 496 "\n$module: (found more than one CPAN candidate):\n"; 497 print $outfh " perl: $pdist\n"; 498 print $outfh " CPAN: $_\n" for sort keys %$d; 499 next; 500 } 501 $cdist = ( keys %$d )[0]; 502 } 503 504 if ( $cdist ne $pdist ) { 505 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; 506 } 507 } 508} 509 510# get the EXCLUDED and MAP entries for this module, or 511# make up defauts if they don't exist 512 513sub get_map { 514 my ( $m, $module_name, $perl_files ) = @_; 515 516 my ( $excluded, $map, $customized ) = @$m{qw(EXCLUDED MAP CUSTOMIZED)}; 517 518 $excluded ||= []; 519 $customized ||= []; 520 521 return $excluded, $map, $customized if $map; 522 523 # all files under ext/foo-bar (plus maybe some under t/lib)??? 524 525 my $ext; 526 for (@$perl_files) { 527 if (m{^((?:ext|dist|cpan)/[^/]+/)}) { 528 if ( defined $ext and $ext ne $1 ) { 529 530 # more than one ext/$ext/ 531 undef $ext; 532 last; 533 } 534 $ext = $1; 535 } 536 elsif (m{^t/lib/}) { 537 next; 538 } 539 else { 540 undef $ext; 541 last; 542 } 543 } 544 545 if ( defined $ext ) { 546 $map = { '' => $ext },; 547 } 548 else { 549 ( my $base = $module_name ) =~ s{::}{/}g; 550 $base = "lib/$base"; 551 $map = { 552 'lib/' => 'lib/', 553 '' => "$base/", 554 }; 555 } 556 return $excluded, $map, $customized; 557} 558 559# Given an exclude list and a mapping hash, convert a CPAN filename 560# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). 561# Returns an empty list for an excluded file 562 563sub cpan_to_perl { 564 my ( $excluded, $map, $customized, $cpan_file ) = @_; 565 566 for my $exclude (@$excluded) { 567 next if $exclude ~~ $customized; 568 569 # may be a simple string to match exactly, or a pattern 570 if ( ref $exclude ) { 571 return if $cpan_file =~ $exclude; 572 } 573 else { 574 return if $cpan_file eq $exclude; 575 } 576 } 577 578 my $perl_file = $cpan_file; 579 580 # try longest prefix first, then alphabetically on tie-break 581 for 582 my $prefix ( sort { length($b) <=> length($a) || $a cmp $b } keys %$map ) 583 { 584 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; 585 } 586 return $perl_file; 587} 588 589# fetch a file from a URL and store it in a file given by a filename 590 591sub my_getstore { 592 my ( $url, $file ) = @_; 593 File::Path::mkpath( File::Basename::dirname($file) ); 594 if ( $url =~ qr{\Afile://(?:localhost)?/} ) { 595 ( my $local_path = $url ) =~ s{\Afile://(?:localhost)?}{}; 596 File::Copy::copy( $local_path, $file ); 597 } else { 598 my $http = HTTP::Tiny->new; 599 my $response = $http->mirror($url, $file); 600 return $response->{success}; 601 } 602} 603 604# download and unpack a distribution 605# Returns the full pathname of the extracted directory 606# (eg '/tmp/XYZ/Foo_bar-1.23') 607 608# cache_dir: where to download the .tar.gz file to 609# mirror_url: CPAN mirror to download from 610# untar_dir: where to untar or unzup the file 611# module: name of module 612# dist: name of the distribution 613 614sub get_distribution { 615 my ( $src_dir, $mirror_url, $untar_dir, $module, $dist ) = @_; 616 617 $dist =~ m{.+/([^/]+)$} 618 or die 619 "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; 620 my $filename = $1; 621 622 my $download_file = catfile( $src_dir, $filename ); 623 624 # download distribution 625 626 if ( -f $download_file and !-s $download_file ) { 627 628 # wget can leave a zero-length file on failed download 629 unlink $download_file; 630 } 631 632 unless ( -f $download_file ) { 633 634 # not cached 635 my $url = cpan_url_distribution( $mirror_url, $dist ); 636 my_getstore( $url, $download_file ) 637 or die "ERROR: Could not fetch '$url'\n"; 638 } 639 640 # get the expected name of the extracted distribution dir 641 642 my $path = catfile( $untar_dir, $filename ); 643 644 $path =~ s/\.tar\.gz$// 645 or $path =~ s/\.zip$// 646 or die 647 "ERROR: downloaded file does not have a recognised suffix: $path\n"; 648 649 # extract it unless we already have it cached or tarball is newer 650 if ( !-d $path || ( -M $download_file < -M $path ) ) { 651 my $ae = Archive::Extract->new( archive => $download_file ); 652 $ae->extract( to => $untar_dir ) 653 or die 654 "ERROR: failed to extract distribution '$download_file to temp. dir: " 655 . $ae->error() . "\n"; 656 657 $path = $ae->extract_path; 658 } 659 660 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; 661 662 return $path; 663} 664 665# produce the diff of a single file 666sub file_diff { 667 my $outfh = shift; 668 my $cpan_file = shift; 669 my $perl_file = shift; 670 my $reverse = shift; 671 my $diff_opts = shift; 672 673 my @cmd = ( DIFF_CMD, split ' ', $diff_opts ); 674 if ($reverse) { 675 push @cmd, $perl_file, $cpan_file; 676 } 677 else { 678 push @cmd, $cpan_file, $perl_file; 679 } 680 return `@cmd`; 681 682} 683 684sub customized { 685 my ( $module_data, $file ) = @_; 686 return grep { $file eq $_ } @{ $module_data->{CUSTOMIZED} }; 687} 688 689run(); 690 691