1#!/usr/bin/env perl 2 3# core-cpan-diff: Compare CPAN modules with their equivalent in core 4 5# Originally based on App::DualLivedDiff by Steffen Mueller. 6 7use strict; 8use warnings; 9 10use 5.010; 11 12use Getopt::Long; 13use File::Temp (); 14use File::Path (); 15use File::Spec; 16use Archive::Extract; 17use IO::Uncompress::Gunzip (); 18use File::Compare (); 19use ExtUtils::Manifest; 20 21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } 22use lib 'Porting'; 23use Maintainers (); 24 25# if running from blead, we may be doing -Ilib, which means when we 26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. 27# So preload the things we need, and tell it to check %INC first: 28 29use Archive::Tar; 30use IPC::Open3; 31use IO::Select; 32$Module::Load::Conditional::CHECK_INC_HASH = 1; 33# stop Archive::Extract whinging about lack of Archive::Zip 34$Archive::Extract::WARN = 0; 35 36 37# Files, which if they exist in CPAN but not in perl, will not generate 38# an 'Only in CPAN' listing 39# 40our %IGNORABLE = map { ($_ => 1) } 41 qw(.cvsignore .dualLivedDiffConfig .gitignore 42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL 43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS 44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL 45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README 46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW); 47 48# where, under the cache dir, to untar stuff to 49 50use constant UNTAR_DIR => 'untarred'; 51 52use constant DIFF_CMD => 'diff'; 53use constant WGET_CMD => 'wget'; 54 55sub usage { 56 print STDERR "\n@_\n\n" if @_; 57 print STDERR <<HERE; 58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] 59 60-a/--all Scan all dual-life modules. 61 62-c/--cachedir Where to save downloaded CPAN tarball files 63 (defaults to /tmp/something/ with deletion after each run). 64 65-d/--diff Display file differences using diff(1), rather than just 66 listing which files have changed. 67 The diff(1) command is assumed to be in your PATH. 68 69--diffopts Options to pass to the diff command. Defaults to '-u'. 70 71-f|force Force download from CPAN of new 02packages.details.txt file 72 (with --crosscheck only). 73 74-o/--output File name to write output to (defaults to STDOUT). 75 76-r/--reverse Reverses the diff (perl to CPAN). 77 78-u/--upstream only print modules with the given upstream (defaults to all) 79 80-v/--verbose List the fate of *all* files in the tarball, not just those 81 that differ or are missing. 82 83-x|crosscheck List the distributions whose current CPAN version differs from 84 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). 85 86By default (i.e. without the --crosscheck option), for each listed module 87(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball 88from CPAN associated with that module, and compare the files in it with 89those in the perl source tree. 90 91Must be run from the root of the perl source tree. 92Module names must match the keys of %Modules in Maintainers.pl. 93HERE 94 exit(1); 95} 96 97 98sub run { 99 my $scan_all; 100 my $diff_opts; 101 my $reverse = 0; 102 my @wanted_upstreams; 103 my $cache_dir; 104 my $use_diff; 105 my $output_file; 106 my $verbose; 107 my $force; 108 my $do_crosscheck; 109 110 GetOptions( 111 'a|all' => \$scan_all, 112 'c|cachedir=s' => \$cache_dir, 113 'd|diff' => \$use_diff, 114 'diffopts:s' => \$diff_opts, 115 'f|force' => \$force, 116 'h|help' => \&usage, 117 'o|output=s' => \$output_file, 118 'r|reverse' => \$reverse, 119 'u|upstream=s@'=> \@wanted_upstreams, 120 'v|verbose' => \$verbose, 121 'x|crosscheck' => \$do_crosscheck, 122 ) or usage; 123 124 125 my @modules; 126 127 usage("Cannot mix -a with module list") if $scan_all && @ARGV; 128 129 if ($do_crosscheck) { 130 usage("can't use -r, -d, --diffopts, -v with --crosscheck") 131 if ($reverse || $use_diff || $diff_opts || $verbose); 132 } 133 else { 134 $diff_opts = '-u' unless defined $diff_opts; 135 usage("can't use -f without --crosscheck") if $force; 136 } 137 138 @modules = $scan_all 139 ? grep $Maintainers::Modules{$_}{CPAN}, 140 (sort {lc $a cmp lc $b } keys %Maintainers::Modules) 141 : @ARGV; 142 usage("No modules specified") unless @modules; 143 144 145 my $outfh; 146 if (defined $output_file) { 147 open $outfh, '>', $output_file 148 or die "ERROR: could not open file '$output_file' for writing: $!\n"; 149 } 150 else { 151 open $outfh, ">&STDOUT" 152 or die "ERROR: can't dup STDOUT: $!\n"; 153 } 154 155 if (defined $cache_dir) { 156 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; 157 } 158 159 if ($do_crosscheck) { 160 do_crosscheck($outfh, $cache_dir, $force, \@modules); 161 } 162 else { 163 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, 164 $reverse, $diff_opts, \@wanted_upstreams); 165 } 166} 167 168 169 170# compare a list of modules against their CPAN equivalents 171 172sub do_compare { 173 my ($modules, $outfh, $output_file, $cache_dir, $verbose, 174 $use_diff, $reverse, $diff_opts, $wanted_upstreams) = @_; 175 176 177 # first, make sure we have a directory where they can all be untarred, 178 # and if its a permanent directory, clear any previous content 179 my $untar_dir; 180 if ($cache_dir) { 181 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); 182 if (-d $untar_dir) { 183 File::Path::rmtree($untar_dir) 184 or die "failed to remove $untar_dir\n"; 185 } 186 mkdir $untar_dir 187 or die "mkdir $untar_dir: $!\n"; 188 } 189 else { 190 $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); 191 } 192 193 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; 194 195 my %seen_dist; 196 for my $module (@$modules) { 197 warn "Processing $module ...\n" if defined $output_file; 198 199 my $m = $Maintainers::Modules{$module} 200 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 201 202 unless ($m->{CPAN}) { 203 print $outfh "WARNING: $module is not dual-life; skipping\n"; 204 next; 205 } 206 207 my $dist = $m->{DISTRIBUTION}; 208 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; 209 210 if ($seen_dist{$dist}) { 211 warn "WARNING: duplicate entry for $dist in $module\n" 212 } 213 214 my $upstream = $m->{UPSTREAM} || 'UNKNOWN'; 215 next if @$wanted_upstreams and ! ($upstream ~~ $wanted_upstreams); 216 print $outfh "\n$module - ".$Maintainers::Modules{$module}->{DISTRIBUTION}."\n" unless $use_diff; 217 print $outfh " upstream is: ".($m->{UPSTREAM} || 'UNKNOWN!')."\n"; 218 219 $seen_dist{$dist}++; 220 221 my $cpan_dir; 222 eval { 223 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist) 224 }; 225 if ($@) { 226 print $outfh " ", $@; 227 print $outfh " (skipping)\n"; 228 next; 229 } 230 231 my @perl_files = Maintainers::get_module_files($module); 232 233 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); 234 die "ERROR: no such file: $manifest\n" unless -f $manifest; 235 236 my $cpan_files = ExtUtils::Manifest::maniread($manifest); 237 my @cpan_files = sort keys %$cpan_files; 238 239 my ($excluded, $map) = get_map($m, $module, \@perl_files); 240 241 my %perl_unseen; 242 @perl_unseen{@perl_files} = (); 243 my %perl_files = %perl_unseen; 244 245 foreach my $cpan_file (@cpan_files) { 246 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); 247 unless (defined $mapped_file) { 248 print $outfh " Excluded: $cpan_file\n" if $verbose; 249 next; 250 } 251 252 if (exists $perl_files{$mapped_file}) { 253 delete $perl_unseen{$mapped_file}; 254 } 255 else { 256 # some CPAN files foo are stored in core as foo.packed, 257 # which are then unpacked by 'make test_prep' 258 my $packed_file = "$mapped_file.packed"; 259 if (exists $perl_files{$packed_file} ) { 260 if (! -f $mapped_file and -f $packed_file) { 261 print $outfh <<EOF; 262WARNING: $mapped_file not found, but .packed variant exists. 263Perhaps you need to run 'make test_prep'? 264EOF 265 next; 266 } 267 delete $perl_unseen{$packed_file}; 268 } 269 else { 270 if ($ignorable{$cpan_file}) { 271 print $outfh " Ignored: $cpan_file\n" if $verbose; 272 next; 273 } 274 275 unless ($use_diff) { 276 print $outfh " CPAN only: $cpan_file", 277 ($cpan_file eq $mapped_file) ? "\n" 278 : " (expected $mapped_file)\n"; 279 } 280 next; 281 } 282 } 283 284 285 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file); 286 287 # should never happen 288 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; 289 290 # might happen if the FILES entry in Maintainers.pl is wrong 291 unless (-f $mapped_file) { 292 print $outfh "WARNING: perl file not found: $mapped_file\n"; 293 next; 294 } 295 296 my $relative_mapped_file = $mapped_file; 297 $relative_mapped_file =~ s/^(cpan|dist|ext)\/.*?\///; 298 299 if (File::Compare::compare($abs_cpan_file, $mapped_file)) { 300 301 302 if ($use_diff) { 303 file_diff($outfh, $abs_cpan_file, $mapped_file, 304 $reverse, $diff_opts); 305 } 306 else { 307 if ($cpan_file eq $relative_mapped_file) { 308 print $outfh " Modified: $relative_mapped_file\n"; 309 } 310 else { 311 print $outfh " Modified: $cpan_file $relative_mapped_file\n"; 312 } 313 } 314 } 315 elsif ($verbose) { 316 if ($cpan_file eq $relative_mapped_file) { 317 print $outfh " Unchanged: $cpan_file\n"; 318 } 319 else { 320 print $outfh " Unchanged: $cpan_file $relative_mapped_file\n"; 321 } 322 } 323 } 324 for (sort keys %perl_unseen) { 325 print $outfh " Perl only: $_\n" unless $use_diff; 326 } 327 } 328} 329 330# given FooBar-1.23_45.tar.gz, return FooBar 331 332sub distro_base { 333 my $d = shift; 334 $d =~ s/\.tar\.gz$//; 335 $d =~ s/\.gip$//; 336 $d =~ s/[\d\-_\.]+$//; 337 return $d; 338} 339 340# process --crosscheck action: 341# ie list all distributions whose CPAN versions differ from that listed in 342# Maintainers.pl 343 344sub do_crosscheck { 345 my ($outfh, $cache_dir, $force, $modules) = @_; 346 347 my $file = '02packages.details.txt'; 348 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 349 my $path = File::Spec->catfile($download_dir, $file); 350 my $gzfile = "$path.gz"; 351 352 # grab 02packages.details.txt 353 354 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; 355 356 if (! -f $gzfile or $force) { 357 unlink $gzfile; 358 my_getstore($url, $gzfile); 359 } 360 unlink $path; 361 IO::Uncompress::Gunzip::gunzip($gzfile, $path) 362 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; 363 364 # suck in the data from it 365 366 open my $fh, '<', $path 367 or die "ERROR: open: $file: $!\n"; 368 369 my %distros; 370 my %modules; 371 372 while (<$fh>) { 373 next if 1../^$/; 374 chomp; 375 my @f = split ' ', $_; 376 if (@f != 3) { 377 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; 378 next; 379 } 380 my $distro = $f[2]; 381 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ 382 $modules{$f[0]} = $distro; 383 384 (my $short_distro = $distro) =~ s{^.*/}{}; 385 386 $distros{distro_base($short_distro)}{$distro} = 1; 387 } 388 389 for my $module (@$modules) { 390 my $m = $Maintainers::Modules{$module} 391 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 392 393 unless ($m->{CPAN}) { 394 print $outfh "\nWARNING: $module is not dual-life; skipping\n"; 395 next; 396 } 397 398 # given an entry like 399 # Foo::Bar 1.23 foo-bar-1.23.tar.gz, 400 # first compare the module name against Foo::Bar, and failing that, 401 # against foo-bar 402 403 my $pdist = $m->{DISTRIBUTION}; 404 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; 405 406 my $cdist = $modules{$module}; 407 (my $short_pdist = $pdist) =~ s{^.*/}{}; 408 409 unless (defined $cdist) { 410 my $d = $distros{distro_base($short_pdist)}; 411 unless (defined $d) { 412 print $outfh "\n$module: Can't determine current CPAN entry\n"; 413 next; 414 } 415 if (keys %$d > 1) { 416 print $outfh "\n$module: (found more than one CPAN candidate):\n"; 417 print $outfh " perl: $pdist\n"; 418 print $outfh " CPAN: $_\n" for sort keys %$d; 419 next; 420 } 421 $cdist = (keys %$d)[0]; 422 } 423 424 if ($cdist ne $pdist) { 425 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; 426 } 427 } 428} 429 430 431 432# get the EXCLUDED and MAP entries for this module, or 433# make up defauts if they don't exist 434 435sub get_map { 436 my ($m, $module_name, $perl_files) = @_; 437 438 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; 439 440 $excluded ||= []; 441 442 return $excluded, $map if $map; 443 444 # all files under ext/foo-bar (plus maybe some under t/lib)??? 445 446 my $ext; 447 for (@$perl_files) { 448 if (m{^((?:ext|dist|cpan)/[^/]+/)}) { 449 if (defined $ext and $ext ne $1) { 450 # more than one ext/$ext/ 451 undef $ext; 452 last; 453 } 454 $ext = $1; 455 } 456 elsif (m{^t/lib/}) { 457 next; 458 } 459 else { 460 undef $ext; 461 last; 462 } 463 } 464 465 if (defined $ext) { 466 $map = { '' => $ext }, 467 } 468 else { 469 (my $base = $module_name) =~ s{::}{/}g; 470 $base ="lib/$base"; 471 $map = { 472 'lib/' => 'lib/', 473 '' => "$base/", 474 }; 475 } 476 return $excluded, $map; 477} 478 479 480# Given an exclude list and a mapping hash, convert a CPAN filename 481# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). 482# Returns an empty list for an excluded file 483 484sub cpan_to_perl { 485 my ($excluded, $map, $cpan_file) = @_; 486 487 for my $exclude (@$excluded) { 488 # may be a simple string to match exactly, or a pattern 489 if (ref $exclude) { 490 return if $cpan_file =~ $exclude; 491 } 492 else { 493 return if $cpan_file eq $exclude; 494 } 495 } 496 497 my $perl_file = $cpan_file; 498 499 # try longest prefix first, then alphabetically on tie-break 500 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) 501 { 502 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; 503 } 504 return $perl_file; 505} 506 507 508 509# do LWP::Simple::getstore, possibly without LWP::Simple being available 510 511my $lwp_simple_available; 512 513sub my_getstore { 514 my ($url, $file) = @_; 515 unless (defined $lwp_simple_available) { 516 eval { require LWP::Simple }; 517 $lwp_simple_available = $@ eq ''; 518 } 519 if ($lwp_simple_available) { 520 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); 521 } 522 else { 523 return system(WGET_CMD, "-O", $file, $url) == 0; 524 } 525} 526 527 528# download and unpack a distribution 529# Returns the full pathname of the extracted directory 530# (eg '/tmp/XYZ/Foo_bar-1.23') 531 532# cache_dir: where to dowenload the .tar.gz file to 533# untar_dir: where to untar or unzup the file 534# module: name of module 535# dist: name of the distribution 536 537sub get_distribution { 538 my ($cache_dir, $untar_dir, $module, $dist) = @_; 539 540 $dist =~ m{.+/([^/]+)$} 541 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; 542 my $filename = $1; 543 544 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 545 my $download_file = File::Spec->catfile($download_dir, $filename); 546 547 # download distribution 548 549 if (-f $download_file and ! -s $download_file ) { 550 # wget can leave a zero-length file on failed download 551 unlink $download_file; 552 } 553 554 unless (-f $download_file) { 555 # not cached 556 $dist =~ /^([A-Z])([A-Z])/ 557 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; 558 559 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; 560 my_getstore($url, $download_file) 561 or die "ERROR: Could not fetch '$url'\n"; 562 } 563 564 # extract distribution 565 566 my $ae = Archive::Extract->new( archive => $download_file); 567 $ae->extract( to => $untar_dir ) 568 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; 569 570 # get the name of the extracted distribution dir 571 572 my $path = File::Spec->catfile($untar_dir, $filename); 573 574 $path =~ s/\.tar\.gz$// or 575 $path =~ s/\.zip$// or 576 die "ERROR: downloaded file does not have a recognised suffix: $path\n"; 577 578 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; 579 580 return $path; 581} 582 583 584# produce the diff of a single file 585sub file_diff { 586 my $outfh = shift; 587 my $cpan_file = shift; 588 my $perl_file = shift; 589 my $reverse = shift; 590 my $diff_opts = shift; 591 592 593 my @cmd = (DIFF_CMD, split ' ', $diff_opts); 594 if ($reverse) { 595 push @cmd, $perl_file, $cpan_file; 596 } 597 else { 598 push @cmd, $cpan_file, $perl_file; 599 } 600 my $result = `@cmd`; 601 602 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; 603 604 print $outfh $result; 605} 606 607 608run(); 609 610