1#!/usr/bin/env perl 2 3# core-cpan-diff: Compare CPAN modules with their equivalent in core 4 5# Originally based on App::DualLivedDiff by Steffen Mueller. 6 7use strict; 8use warnings; 9 10use 5.010; 11 12use Getopt::Long; 13use File::Temp (); 14use File::Path (); 15use File::Spec; 16use Archive::Extract; 17use IO::Uncompress::Gunzip (); 18use File::Compare (); 19use ExtUtils::Manifest; 20 21BEGIN { die "Must be run from root of perl source tree\n" unless -d 'Porting' } 22use lib 'Porting'; 23use Maintainers (); 24 25# if running from blead, we may be doing -Ilib, which means when we 26# 'chdir /tmp/foo', Archive::Extract may not find Archive::Tar etc. 27# So preload the things we need, and tell it to check %INC first: 28 29use Archive::Tar; 30use IPC::Open3; 31use IO::Select; 32$Module::Load::Conditional::CHECK_INC_HASH = 1; 33# stop Archive::Extract whinging about lack of Archive::Zip 34$Archive::Extract::WARN = 0; 35 36 37# Files, which if they exist in CPAN but not in perl, will not generate 38# an 'Only in CPAN' listing 39# 40our %IGNORABLE = map { ($_ => 1) } 41 qw(.cvsignore .dualLivedDiffConfig .gitignore 42 ANNOUNCE Announce Artistic AUTHORS BENCHMARK BUGS Build.PL 43 CHANGELOG ChangeLog CHANGES Changes COPYING Copying CREDITS 44 GOALS HISTORY INSTALL INSTALL.SKIP LICENSE Makefile.PL 45 MANIFEST MANIFEST.SKIP META.yml NEW NOTES ppport.h README 46 SIGNATURE THANKS TODO Todo VERSION WHATSNEW); 47 48# where, under the cache dir, to untar stuff to 49 50use constant UNTAR_DIR => 'untarred'; 51 52use constant DIFF_CMD => 'diff'; 53use constant WGET_CMD => 'wget'; 54 55sub usage { 56 print STDERR "\n@_\n\n" if @_; 57 print STDERR <<HERE; 58Usage: $0 [opts] [ -d | -v | -x ] [ -a | module ... ] 59 60-a/--all Scan all dual-life modules. 61 62-c/--cachedir Where to save downloaded CPAN tarball files 63 (defaults to /tmp/something/ with deletion after each run). 64 65-d/--diff Display file differences using diff(1), rather than just 66 listing which files have changed. 67 The diff(1) command is assumed to be in your PATH. 68 69--diffopts Options to pass to the diff command. Defaults to '-u'. 70 71-f|force Force download from CPAN of new 02packages.details.txt file 72 (with --crosscheck only). 73 74-o/--output File name to write output to (defaults to STDOUT). 75 76-r/--reverse Reverses the diff (perl to CPAN). 77 78-v/--verbose List the fate of *all* files in the tarball, not just those 79 that differ or are missing. 80 81-x|crosscheck List the distributions whose current CPAN version differs from 82 that in blead (i.e. the DISTRIBUTION field in Maintainers.pl). 83 84By default (i.e. without the --crosscheck option), for each listed module 85(or with -a, all CPAN modules listed in Maintainers.pl), grab the tarball 86from CPAN associated with that module, and compare the files in it with 87those in the perl source tree. 88 89Must be run from the root of the perl source tree. 90Module names must match the keys of %Modules in Maintainers.pl. 91HERE 92 exit(1); 93} 94 95 96sub run { 97 my $scan_all; 98 my $diff_opts; 99 my $reverse = 0; 100 my $cache_dir; 101 my $use_diff; 102 my $output_file; 103 my $verbose; 104 my $force; 105 my $do_crosscheck; 106 107 GetOptions( 108 'a|all' => \$scan_all, 109 'c|cachedir=s' => \$cache_dir, 110 'd|diff' => \$use_diff, 111 'diffopts:s' => \$diff_opts, 112 'f|force' => \$force, 113 'h|help' => \&usage, 114 'o|output=s' => \$output_file, 115 'r|reverse' => \$reverse, 116 'v|verbose' => \$verbose, 117 'x|crosscheck' => \$do_crosscheck, 118 ) or usage; 119 120 121 my @modules; 122 123 usage("Cannot mix -a with module list") if $scan_all && @ARGV; 124 125 if ($do_crosscheck) { 126 usage("can't use -r, -d, --diffopts, -v with --crosscheck") 127 if ($reverse || $use_diff || $diff_opts || $verbose); 128 } 129 else { 130 $diff_opts = '-u' unless defined $diff_opts; 131 usage("can't use -f without --crosscheck") if $force; 132 } 133 134 @modules = $scan_all 135 ? grep $Maintainers::Modules{$_}{CPAN}, 136 (sort {lc $a cmp lc $b } keys %Maintainers::Modules) 137 : @ARGV; 138 usage("No modules specified") unless @modules; 139 140 141 my $outfh; 142 if (defined $output_file) { 143 open $outfh, '>', $output_file 144 or die "ERROR: could not open file '$output_file' for writing: $!\n"; 145 } 146 else { 147 open $outfh, ">&STDOUT" 148 or die "ERROR: can't dup STDOUT: $!\n"; 149 } 150 151 if (defined $cache_dir) { 152 die "ERROR: no such directory: '$cache_dir'\n" unless -d $cache_dir; 153 } 154 155 if ($do_crosscheck) { 156 do_crosscheck($outfh, $cache_dir, $force, \@modules); 157 } 158 else { 159 do_compare(\@modules, $outfh, $output_file, $cache_dir, $verbose, $use_diff, 160 $reverse, $diff_opts); 161 } 162} 163 164 165 166# compare a list of modules against their CPAN equivalents 167 168sub do_compare { 169 my ($modules, $outfh, $output_file, $cache_dir, $verbose, 170 $use_diff, $reverse, $diff_opts) = @_; 171 172 173 # first, make sure we have a directory where they can all be untarred, 174 # and if its a permanent directory, clear any previous content 175 my $untar_dir; 176 if ($cache_dir) { 177 $untar_dir = File::Spec->catdir($cache_dir, UNTAR_DIR); 178 if (-d $untar_dir) { 179 File::Path::rmtree($untar_dir) 180 or die "failed to remove $untar_dir\n"; 181 } 182 mkdir $untar_dir 183 or die "mkdir $untar_dir: $!\n"; 184 } 185 else { 186 $untar_dir = File::Temp::tempdir( CLEANUP => 1 ); 187 } 188 189 my %ignorable = map { ($_ => 1) } @Maintainers::IGNORABLE; 190 191 my %seen_dist; 192 for my $module (@$modules) { 193 warn "Processing $module ...\n" if defined $output_file; 194 print $outfh "\n$module\n" unless $use_diff; 195 196 my $m = $Maintainers::Modules{$module} 197 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 198 199 unless ($m->{CPAN}) { 200 print $outfh "WARNING: $module is not dual-life; skipping\n"; 201 next; 202 } 203 204 my $dist = $m->{DISTRIBUTION}; 205 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $dist; 206 207 if ($seen_dist{$dist}) { 208 warn "WARNING: duplicate entry for $dist in $module\n" 209 } 210 $seen_dist{$dist}++; 211 212 my $cpan_dir; 213 eval { 214 $cpan_dir = get_distribution($cache_dir, $untar_dir, $module, $dist) 215 }; 216 if ($@) { 217 print $outfh " ", $@; 218 print $outfh " (skipping)\n"; 219 next; 220 } 221 222 my @perl_files = Maintainers::get_module_files($module); 223 224 my $manifest = File::Spec->catfile($cpan_dir, 'MANIFEST'); 225 die "ERROR: no such file: $manifest\n" unless -f $manifest; 226 227 my $cpan_files = ExtUtils::Manifest::maniread($manifest); 228 my @cpan_files = sort keys %$cpan_files; 229 230 my ($excluded, $map) = get_map($m, $module, \@perl_files); 231 232 my %perl_unseen; 233 @perl_unseen{@perl_files} = (); 234 my %perl_files = %perl_unseen; 235 236 foreach my $cpan_file (@cpan_files) { 237 my $mapped_file = cpan_to_perl($excluded, $map, $cpan_file); 238 unless (defined $mapped_file) { 239 print $outfh " Excluded: $cpan_file\n" if $verbose; 240 next; 241 } 242 243 if (exists $perl_files{$mapped_file}) { 244 delete $perl_unseen{$mapped_file}; 245 } 246 else { 247 # some CPAN files foo are stored in core as foo.packed, 248 # which are then unpacked by 'make test_prep' 249 my $packed_file = "$mapped_file.packed"; 250 if (exists $perl_files{$packed_file} ) { 251 if (! -f $mapped_file and -f $packed_file) { 252 print $outfh <<EOF; 253WARNING: $mapped_file not found, but .packed variant exists. 254Perhaps you need to run 'make test_prep'? 255EOF 256 next; 257 } 258 delete $perl_unseen{$packed_file}; 259 } 260 else { 261 if ($ignorable{$cpan_file}) { 262 print $outfh " Ignored: $cpan_file\n" if $verbose; 263 next; 264 } 265 266 unless ($use_diff) { 267 print $outfh " CPAN only: $cpan_file", 268 ($cpan_file eq $mapped_file) ? "\n" 269 : " (expected $mapped_file)\n"; 270 } 271 next; 272 } 273 } 274 275 276 my $abs_cpan_file = File::Spec->catfile($cpan_dir, $cpan_file); 277 278 # should never happen 279 die "ERROR: can't find file $abs_cpan_file\n" unless -f $abs_cpan_file; 280 281 # might happen if the FILES entry in Maintainers.pl is wrong 282 unless (-f $mapped_file) { 283 print $outfh "WARNING: perl file not found: $mapped_file\n"; 284 next; 285 } 286 287 288 if (File::Compare::compare($abs_cpan_file, $mapped_file)) { 289 if ($use_diff) { 290 file_diff($outfh, $abs_cpan_file, $mapped_file, 291 $reverse, $diff_opts); 292 } 293 else { 294 if ($cpan_file eq $mapped_file) { 295 print $outfh " Modified: $cpan_file\n"; 296 } 297 else { 298 print $outfh " Modified: $cpan_file $mapped_file\n"; 299 } 300 } 301 } 302 elsif ($verbose) { 303 if ($cpan_file eq $mapped_file) { 304 print $outfh " Unchanged: $cpan_file\n"; 305 } 306 else { 307 print $outfh " Unchanged: $cpan_file $mapped_file\n"; 308 } 309 } 310 } 311 for (sort keys %perl_unseen) { 312 print $outfh " Perl only: $_\n" unless $use_diff; 313 } 314 } 315} 316 317# given FooBar-1.23_45.tar.gz, return FooBar 318 319sub distro_base { 320 my $d = shift; 321 $d =~ s/\.tar\.gz$//; 322 $d =~ s/\.gip$//; 323 $d =~ s/[\d\-_\.]+$//; 324 return $d; 325} 326 327# process --crosscheck action: 328# ie list all distributions whose CPAN versions differ from that listed in 329# Maintainers.pl 330 331sub do_crosscheck { 332 my ($outfh, $cache_dir, $force, $modules) = @_; 333 334 my $file = '02packages.details.txt'; 335 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 336 my $path = File::Spec->catfile($download_dir, $file); 337 my $gzfile = "$path.gz"; 338 339 # grab 02packages.details.txt 340 341 my $url = 'http://www.cpan.org/modules/02packages.details.txt.gz'; 342 343 if (! -f $gzfile or $force) { 344 unlink $gzfile; 345 my_getstore($url, $gzfile); 346 } 347 unlink $path; 348 IO::Uncompress::Gunzip::gunzip($gzfile, $path) 349 or die "ERROR: failed to ungzip $gzfile: $IO::Uncompress::Gunzip::GunzipError\n"; 350 351 # suck in the data from it 352 353 open my $fh, '<', $path 354 or die "ERROR: open: $file: $!\n"; 355 356 my %distros; 357 my %modules; 358 359 while (<$fh>) { 360 next if 1../^$/; 361 chomp; 362 my @f = split ' ', $_; 363 if (@f != 3) { 364 warn "WARNING: $file:$.: line doesn't have three fields (skipping)\n"; 365 next; 366 } 367 my $distro = $f[2]; 368 $distro =~ s{^[A-Z]/[A-Z]{2}/}{}; # strip leading A/AB/ 369 $modules{$f[0]} = $distro; 370 371 (my $short_distro = $distro) =~ s{^.*/}{}; 372 373 $distros{distro_base($short_distro)}{$distro} = 1; 374 } 375 376 for my $module (@$modules) { 377 my $m = $Maintainers::Modules{$module} 378 or die "ERROR: No such module in Maintainers.pl: '$module'\n"; 379 380 unless ($m->{CPAN}) { 381 print $outfh "\nWARNING: $module is not dual-life; skipping\n"; 382 next; 383 } 384 385 # given an entry like 386 # Foo::Bar 1.23 foo-bar-1.23.tar.gz, 387 # first compare the module name against Foo::Bar, and failing that, 388 # against foo-bar 389 390 my $pdist = $m->{DISTRIBUTION}; 391 die "ERROR: $module has no DISTRIBUTION entry\n" unless defined $pdist; 392 393 my $cdist = $modules{$module}; 394 (my $short_pdist = $pdist) =~ s{^.*/}{}; 395 396 unless (defined $cdist) { 397 my $d = $distros{distro_base($short_pdist)}; 398 unless (defined $d) { 399 print $outfh "\n$module: Can't determine current CPAN entry\n"; 400 next; 401 } 402 if (keys %$d > 1) { 403 print $outfh "\n$module: (found more than one CPAN candidate):\n"; 404 print $outfh " perl: $pdist\n"; 405 print $outfh " CPAN: $_\n" for sort keys %$d; 406 next; 407 } 408 $cdist = (keys %$d)[0]; 409 } 410 411 if ($cdist ne $pdist) { 412 print $outfh "\n$module:\n Perl: $pdist\n CPAN: $cdist\n"; 413 } 414 } 415} 416 417 418 419# get the EXCLUDED and MAP entries for this module, or 420# make up defauts if they don't exist 421 422sub get_map { 423 my ($m, $module_name, $perl_files) = @_; 424 425 my ($excluded, $map) = @$m{qw(EXCLUDED MAP)}; 426 427 $excluded ||= []; 428 429 return $excluded, $map if $map; 430 431 # all files under ext/foo-bar (plus maybe some under t/lib)??? 432 433 my $ext; 434 for (@$perl_files) { 435 if (m{^(ext/[^/]+/)}) { 436 if (defined $ext and $ext ne $1) { 437 # more than one ext/$ext/ 438 undef $ext; 439 last; 440 } 441 $ext = $1; 442 } 443 elsif (m{^t/lib/}) { 444 next; 445 } 446 else { 447 undef $ext; 448 last; 449 } 450 } 451 452 if (defined $ext) { 453 $map = { '' => $ext }, 454 } 455 else { 456 (my $base = $module_name) =~ s{::}{/}g; 457 $base ="lib/$base"; 458 $map = { 459 'lib/' => 'lib/', 460 '' => "$base/", 461 }; 462 } 463 return $excluded, $map; 464} 465 466 467# Given an exclude list and a mapping hash, convert a CPAN filename 468# (eg 't/bar.t') to the equivalent perl filename (eg 'lib/Foo/Bar/t/bar.t'). 469# Returns an empty list for an excluded file 470 471sub cpan_to_perl { 472 my ($excluded, $map, $cpan_file) = @_; 473 474 for my $exclude (@$excluded) { 475 # may be a simple string to match exactly, or a pattern 476 if (ref $exclude) { 477 return if $cpan_file =~ $exclude; 478 } 479 else { 480 return if $cpan_file eq $exclude; 481 } 482 } 483 484 my $perl_file = $cpan_file; 485 486 # try longest prefix first, then alphabetically on tie-break 487 for my $prefix (sort { length($b) <=> length($a) || $a cmp $b } keys %$map) 488 { 489 last if $perl_file =~ s/^\Q$prefix/$map->{$prefix}/; 490 } 491 return $perl_file; 492} 493 494 495 496# do LWP::Simple::getstore, possibly without LWP::Simple being available 497 498my $lwp_simple_available; 499 500sub my_getstore { 501 my ($url, $file) = @_; 502 unless (defined $lwp_simple_available) { 503 eval { require LWP::Simple }; 504 $lwp_simple_available = $@ eq ''; 505 } 506 if ($lwp_simple_available) { 507 return LWP::Simple::is_success(LWP::Simple::getstore($url, $file)); 508 } 509 else { 510 return system(WGET_CMD, "-O", $file, $url) == 0; 511 } 512} 513 514 515# download and unpack a distribution 516# Returns the full pathname of the extracted directory 517# (eg '/tmp/XYZ/Foo_bar-1.23') 518 519# cache_dir: where to dowenload the .tar.gz file to 520# untar_dir: where to untar or unzup the file 521# module: name of module 522# dist: name of the distribution 523 524sub get_distribution { 525 my ($cache_dir, $untar_dir, $module, $dist) = @_; 526 527 $dist =~ m{.+/([^/]+)$} 528 or die "ERROR: $module: invalid DISTRIBUTION name (no AUTHOR/ prefix): $dist\n"; 529 my $filename = $1; 530 531 my $download_dir = $cache_dir || File::Temp::tempdir( CLEANUP => 1 ); 532 my $download_file = File::Spec->catfile($download_dir, $filename); 533 534 # download distribution 535 536 if (-f $download_file and ! -s $download_file ) { 537 # wget can leave a zero-length file on failed download 538 unlink $download_file; 539 } 540 541 unless (-f $download_file) { 542 # not cached 543 $dist =~ /^([A-Z])([A-Z])/ 544 or die "ERROR: $module: invalid DISTRIBUTION name (not /^[A-Z]{2}/): $dist\n"; 545 546 my $url = "http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist"; 547 my_getstore($url, $download_file) 548 or die "ERROR: Could not fetch '$url'\n"; 549 } 550 551 # extract distribution 552 553 my $ae = Archive::Extract->new( archive => $download_file); 554 $ae->extract( to => $untar_dir ) 555 or die "ERROR: failed to extract distribution '$download_file to temp. dir: " . $ae->error() . "\n"; 556 557 # get the name of the extracted distribution dir 558 559 my $path = File::Spec->catfile($untar_dir, $filename); 560 561 $path =~ s/\.tar\.gz$// or 562 $path =~ s/\.zip$// or 563 die "ERROR: downloaded file does not have a recognised suffix: $path\n"; 564 565 die "ERROR: Extracted tarball does not appear as $path\n" unless -d $path; 566 567 return $path; 568} 569 570 571# produce the diff of a single file 572sub file_diff { 573 my $outfh = shift; 574 my $cpan_file = shift; 575 my $perl_file = shift; 576 my $reverse = shift; 577 my $diff_opts = shift; 578 579 580 my @cmd = (DIFF_CMD, split ' ', $diff_opts); 581 if ($reverse) { 582 push @cmd, $perl_file, $cpan_file; 583 } 584 else { 585 push @cmd, $cpan_file, $perl_file; 586 } 587 my $result = `@cmd`; 588 589 $result =~ s{^(--- |\+\+\+ )/tmp/[^/]+/}{$1}gm; 590 591 print $outfh $result; 592} 593 594 595run(); 596 597