1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.07'; 7require Exporter; 8require Cwd; 9 10# 11# Modified to ensure sub-directory traversal order is not inverded by stack 12# push and pops. That is remains in the same order as in the directory file, 13# or user pre-processing (EG:sorted). 14# 15 16=head1 NAME 17 18File::Find - Traverse a directory tree. 19 20=head1 SYNOPSIS 21 22 use File::Find; 23 find(\&wanted, @directories_to_search); 24 sub wanted { ... } 25 26 use File::Find; 27 finddepth(\&wanted, @directories_to_search); 28 sub wanted { ... } 29 30 use File::Find; 31 find({ wanted => \&process, follow => 1 }, '.'); 32 33=head1 DESCRIPTION 34 35These are functions for searching through directory trees doing work 36on each file found similar to the Unix I<find> command. File::Find 37exports two functions, C<find> and C<finddepth>. They work similarly 38but have subtle differences. 39 40=over 4 41 42=item B<find> 43 44 find(\&wanted, @directories); 45 find(\%options, @directories); 46 47C<find()> does a depth-first search over the given C<@directories> in 48the order they are given. For each file or directory found, it calls 49the C<&wanted> subroutine. (See below for details on how to use the 50C<&wanted> function). Additionally, for each directory found, it will 51C<chdir()> into that directory and continue the search, invoking the 52C<&wanted> function on each file or subdirectory in the directory. 53 54=item B<finddepth> 55 56 finddepth(\&wanted, @directories); 57 finddepth(\%options, @directories); 58 59C<finddepth()> works just like C<find()> except that is invokes the 60C<&wanted> function for a directory I<after> invoking it for the 61directory's contents. It does a postorder traversal instead of a 62preorder traversal, working from the bottom of the directory tree up 63where C<find()> works from the top of the tree down. 64 65=back 66 67=head2 %options 68 69The first argument to C<find()> is either a code reference to your 70C<&wanted> function, or a hash reference describing the operations 71to be performed for each file. The 72code reference is described in L<The wanted function> below. 73 74Here are the possible keys for the hash: 75 76=over 3 77 78=item C<wanted> 79 80The value should be a code reference. This code reference is 81described in L<The wanted function> below. 82 83=item C<bydepth> 84 85Reports the name of a directory only AFTER all its entries 86have been reported. Entry point C<finddepth()> is a shortcut for 87specifying C<<{ bydepth => 1 }>> in the first argument of C<find()>. 88 89=item C<preprocess> 90 91The value should be a code reference. This code reference is used to 92preprocess the current directory. The name of the currently processed 93directory is in C<$File::Find::dir>. Your preprocessing function is 94called after C<readdir()>, but before the loop that calls the C<wanted()> 95function. It is called with a list of strings (actually file/directory 96names) and is expected to return a list of strings. The code can be 97used to sort the file/directory names alphabetically, numerically, 98or to filter out directory entries based on their name alone. When 99I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 100 101=item C<postprocess> 102 103The value should be a code reference. It is invoked just before leaving 104the currently processed directory. It is called in void context with no 105arguments. The name of the current directory is in C<$File::Find::dir>. This 106hook is handy for summarizing a directory, such as calculating its disk 107usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 108no-op. 109 110=item C<follow> 111 112Causes symbolic links to be followed. Since directory trees with symbolic 113links (followed) may contain files more than once and may even have 114cycles, a hash has to be built up with an entry for each file. 115This might be expensive both in space and time for a large 116directory tree. See I<follow_fast> and I<follow_skip> below. 117If either I<follow> or I<follow_fast> is in effect: 118 119=over 6 120 121=item * 122 123It is guaranteed that an I<lstat> has been called before the user's 124C<wanted()> function is called. This enables fast file checks involving S< _>. 125 126=item * 127 128There is a variable C<$File::Find::fullname> which holds the absolute 129pathname of the file with all symbolic links resolved 130 131=back 132 133=item C<follow_fast> 134 135This is similar to I<follow> except that it may report some files more 136than once. It does detect cycles, however. Since only symbolic links 137have to be hashed, this is much cheaper both in space and time. If 138processing a file more than once (by the user's C<wanted()> function) 139is worse than just taking time, the option I<follow> should be used. 140 141=item C<follow_skip> 142 143C<follow_skip==1>, which is the default, causes all files which are 144neither directories nor symbolic links to be ignored if they are about 145to be processed a second time. If a directory or a symbolic link 146are about to be processed a second time, File::Find dies. 147 148C<follow_skip==0> causes File::Find to die if any file is about to be 149processed a second time. 150 151C<follow_skip==2> causes File::Find to ignore any duplicate files and 152directories but to proceed normally otherwise. 153 154=item C<dangling_symlinks> 155 156If true and a code reference, will be called with the symbolic link 157name and the directory it lives in as arguments. Otherwise, if true 158and warnings are on, warning "symbolic_link_name is a dangling 159symbolic link\n" will be issued. If false, the dangling symbolic link 160will be silently ignored. 161 162=item C<no_chdir> 163 164Does not C<chdir()> to each directory as it recurses. The C<wanted()> 165function will need to be aware of this, of course. In this case, 166C<$_> will be the same as C<$File::Find::name>. 167 168=item C<untaint> 169 170If find is used in taint-mode (-T command line switch or if EUID != UID 171or if EGID != GID) then internally directory names have to be untainted 172before they can be chdir'ed to. Therefore they are checked against a regular 173expression I<untaint_pattern>. Note that all names passed to the user's 174I<wanted()> function are still tainted. If this option is used while 175not in taint-mode, C<untaint> is a no-op. 176 177=item C<untaint_pattern> 178 179See above. This should be set using the C<qr> quoting operator. 180The default is set to C<qr|^([-+@\w./]+)$|>. 181Note that the parentheses are vital. 182 183=item C<untaint_skip> 184 185If set, a directory which fails the I<untaint_pattern> is skipped, 186including all its sub-directories. The default is to 'die' in such a case. 187 188=back 189 190=head2 The wanted function 191 192The C<wanted()> function does whatever verifications you want on 193each file and directory. Note that despite its name, the C<wanted()> 194function is a generic callback function, and does B<not> tell 195File::Find if a file is "wanted" or not. In fact, its return value 196is ignored. 197 198The wanted function takes no arguments but rather does its work 199through a collection of variables. 200 201=over 4 202 203=item C<$File::Find::dir> is the current directory name, 204 205=item C<$_> is the current filename within that directory 206 207=item C<$File::Find::name> is the complete pathname to the file. 208 209=back 210 211Don't modify these variables. 212 213For example, when examining the file F</some/path/foo.ext> you will have: 214 215 $File::Find::dir = /some/path/ 216 $_ = foo.ext 217 $File::Find::name = /some/path/foo.ext 218 219You are chdir()'d toC<$File::Find::dir> when the function is called, 220unless C<no_chdir> was specified. Note that when changing to 221directories is in effect the root directory (F</>) is a somewhat 222special case inasmuch as the concatenation of C<$File::Find::dir>, 223C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 224table below summarizes all variants: 225 226 $File::Find::name $File::Find::dir $_ 227 default / / . 228 no_chdir=>0 /etc / etc 229 /etc/x /etc x 230 231 no_chdir=>1 / / / 232 /etc / /etc 233 /etc/x /etc /etc/x 234 235 236When <follow> or <follow_fast> are in effect, there is 237also a C<$File::Find::fullname>. The function may set 238C<$File::Find::prune> to prune the tree unless C<bydepth> was 239specified. Unless C<follow> or C<follow_fast> is specified, for 240compatibility reasons (find.pl, find2perl) there are in addition the 241following globals available: C<$File::Find::topdir>, 242C<$File::Find::topdev>, C<$File::Find::topino>, 243C<$File::Find::topmode> and C<$File::Find::topnlink>. 244 245This library is useful for the C<find2perl> tool, which when fed, 246 247 find2perl / -name .nfs\* -mtime +7 \ 248 -exec rm -f {} \; -o -fstype nfs -prune 249 250produces something like: 251 252 sub wanted { 253 /^\.nfs.*\z/s && 254 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 255 int(-M _) > 7 && 256 unlink($_) 257 || 258 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 259 $dev < 0 && 260 ($File::Find::prune = 1); 261 } 262 263Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 264filehandle that caches the information from the preceding 265C<stat()>, C<lstat()>, or filetest. 266 267Here's another interesting wanted function. It will find all symbolic 268links that don't resolve: 269 270 sub wanted { 271 -l && !-e && print "bogus link: $File::Find::name\n"; 272 } 273 274See also the script C<pfind> on CPAN for a nice application of this 275module. 276 277=head1 WARNINGS 278 279If you run your program with the C<-w> switch, or if you use the 280C<warnings> pragma, File::Find will report warnings for several weird 281situations. You can disable these warnings by putting the statement 282 283 no warnings 'File::Find'; 284 285in the appropriate scope. See L<perllexwarn> for more info about lexical 286warnings. 287 288=head1 CAVEAT 289 290=over 2 291 292=item $dont_use_nlink 293 294You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to 295force File::Find to always stat directories. This was used for file systems 296that do not have an C<nlink> count matching the number of sub-directories. 297Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file 298system) and a couple of others. 299 300You shouldn't need to set this variable, since File::Find should now detect 301such file systems on-the-fly and switch itself to using stat. This works even 302for parts of your file system, like a mounted CD-ROM. 303 304If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. 305 306=item symlinks 307 308Be aware that the option to follow symbolic links can be dangerous. 309Depending on the structure of the directory tree (including symbolic 310links to directories) you might traverse a given (physical) directory 311more than once (only if C<follow_fast> is in effect). 312Furthermore, deleting or changing files in a symbolically linked directory 313might cause very unpleasant surprises, since you delete or change files 314in an unknown directory. 315 316=back 317 318=head1 NOTES 319 320=over 4 321 322=item * 323 324Mac OS (Classic) users should note a few differences: 325 326=over 4 327 328=item * 329 330The path separator is ':', not '/', and the current directory is denoted 331as ':', not '.'. You should be careful about specifying relative pathnames. 332While a full path always begins with a volume name, a relative pathname 333should always begin with a ':'. If specifying a volume name only, a 334trailing ':' is required. 335 336=item * 337 338C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_> 339contains the name of a directory, that name may or may not end with a 340':'. Likewise, C<$File::Find::name>, which contains the complete 341pathname to that directory, and C<$File::Find::fullname>, which holds 342the absolute pathname of that directory with all symbolic links resolved, 343may or may not end with a ':'. 344 345=item * 346 347The default C<untaint_pattern> (see above) on Mac OS is set to 348C<qr|^(.+)$|>. Note that the parentheses are vital. 349 350=item * 351 352The invisible system file "Icon\015" is ignored. While this file may 353appear in every directory, there are some more invisible system files 354on every volume, which are all located at the volume root level (i.e. 355"MacintoshHD:"). These system files are B<not> excluded automatically. 356Your filter may use the following code to recognize invisible files or 357directories (requires Mac::Files): 358 359 use Mac::Files; 360 361 # invisible() -- returns 1 if file/directory is invisible, 362 # 0 if it's visible or undef if an error occurred 363 364 sub invisible($) { 365 my $file = shift; 366 my ($fileCat, $fileInfo); 367 my $invisible_flag = 1 << 14; 368 369 if ( $fileCat = FSpGetCatInfo($file) ) { 370 if ($fileInfo = $fileCat->ioFlFndrInfo() ) { 371 return (($fileInfo->fdFlags & $invisible_flag) && 1); 372 } 373 } 374 return undef; 375 } 376 377Generally, invisible files are system files, unless an odd application 378decides to use invisible files for its own purposes. To distinguish 379such files from system files, you have to look at the B<type> and B<creator> 380file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and 381C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes 382(see MacPerl.pm for details). 383 384Files that appear on the desktop actually reside in an (hidden) directory 385named "Desktop Folder" on the particular disk volume. Note that, although 386all desktop files appear to be on the same "virtual" desktop, each disk 387volume actually maintains its own "Desktop Folder" directory. 388 389=back 390 391=back 392 393=head1 BUGS AND CAVEATS 394 395Despite the name of the C<finddepth()> function, both C<find()> and 396C<finddepth()> perform a depth-first search of the directory 397hierarchy. 398 399=head1 HISTORY 400 401File::Find used to produce incorrect results if called recursively. 402During the development of perl 5.8 this bug was fixed. 403The first fixed version of File::Find was 1.01. 404 405=cut 406 407our @ISA = qw(Exporter); 408our @EXPORT = qw(find finddepth); 409 410 411use strict; 412my $Is_VMS; 413my $Is_MacOS; 414 415require File::Basename; 416require File::Spec; 417 418# Should ideally be my() not our() but local() currently 419# refuses to operate on lexicals 420 421our %SLnkSeen; 422our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 423 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 424 $pre_process, $post_process, $dangling_symlinks); 425 426sub contract_name { 427 my ($cdir,$fn) = @_; 428 429 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 430 431 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 432 433 $fn =~ s|^\./||; 434 435 my $abs_name= $cdir . $fn; 436 437 if (substr($fn,0,3) eq '../') { 438 1 while $abs_name =~ s!/[^/]*/\.\./!/!; 439 } 440 441 return $abs_name; 442} 443 444# return the absolute name of a directory or file 445sub contract_name_Mac { 446 my ($cdir,$fn) = @_; 447 my $abs_name; 448 449 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':' 450 451 my $colon_count = length ($1); 452 if ($colon_count == 1) { 453 $abs_name = $cdir . $2; 454 return $abs_name; 455 } 456 else { 457 # need to move up the tree, but 458 # only if it's not a volume name 459 for (my $i=1; $i<$colon_count; $i++) { 460 unless ($cdir =~ /^[^:]+:$/) { # volume name 461 $cdir =~ s/[^:]+:$//; 462 } 463 else { 464 return undef; 465 } 466 } 467 $abs_name = $cdir . $2; 468 return $abs_name; 469 } 470 471 } 472 else { 473 474 # $fn may be a valid path to a directory or file or (dangling) 475 # symlink, without a leading ':' 476 if ( (-e $fn) || (-l $fn) ) { 477 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:* 478 return $fn; # $fn is already an absolute path 479 } 480 else { 481 $abs_name = $cdir . $fn; 482 return $abs_name; 483 } 484 } 485 else { # argh!, $fn is not a valid directory/file 486 return undef; 487 } 488 } 489} 490 491sub PathCombine($$) { 492 my ($Base,$Name) = @_; 493 my $AbsName; 494 495 if ($Is_MacOS) { 496 # $Name is the resolved symlink (always a full path on MacOS), 497 # i.e. there's no need to call contract_name_Mac() 498 $AbsName = $Name; 499 500 # (simple) check for recursion 501 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion 502 return undef; 503 } 504 } 505 else { 506 if (substr($Name,0,1) eq '/') { 507 $AbsName= $Name; 508 } 509 else { 510 $AbsName= contract_name($Base,$Name); 511 } 512 513 # (simple) check for recursion 514 my $newlen= length($AbsName); 515 if ($newlen <= length($Base)) { 516 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 517 && $AbsName eq substr($Base,0,$newlen)) 518 { 519 return undef; 520 } 521 } 522 } 523 return $AbsName; 524} 525 526sub Follow_SymLink($) { 527 my ($AbsName) = @_; 528 529 my ($NewName,$DEV, $INO); 530 ($DEV, $INO)= lstat $AbsName; 531 532 while (-l _) { 533 if ($SLnkSeen{$DEV, $INO}++) { 534 if ($follow_skip < 2) { 535 die "$AbsName is encountered a second time"; 536 } 537 else { 538 return undef; 539 } 540 } 541 $NewName= PathCombine($AbsName, readlink($AbsName)); 542 unless(defined $NewName) { 543 if ($follow_skip < 2) { 544 die "$AbsName is a recursive symbolic link"; 545 } 546 else { 547 return undef; 548 } 549 } 550 else { 551 $AbsName= $NewName; 552 } 553 ($DEV, $INO) = lstat($AbsName); 554 return undef unless defined $DEV; # dangling symbolic link 555 } 556 557 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 558 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 559 die "$AbsName encountered a second time"; 560 } 561 else { 562 return undef; 563 } 564 } 565 566 return $AbsName; 567} 568 569our($dir, $name, $fullname, $prune); 570sub _find_dir_symlnk($$$); 571sub _find_dir($$$); 572 573# check whether or not a scalar variable is tainted 574# (code straight from the Camel, 3rd ed., page 561) 575sub is_tainted_pp { 576 my $arg = shift; 577 my $nada = substr($arg, 0, 0); # zero-length 578 local $@; 579 eval { eval "# $nada" }; 580 return length($@) != 0; 581} 582 583sub _find_opt { 584 my $wanted = shift; 585 die "invalid top directory" unless defined $_[0]; 586 587 # This function must local()ize everything because callbacks may 588 # call find() or finddepth() 589 590 local %SLnkSeen; 591 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 592 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 593 $pre_process, $post_process, $dangling_symlinks); 594 local($dir, $name, $fullname, $prune); 595 local *_ = \my $a; 596 597 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 598 my $cwd_untainted = $cwd; 599 my $check_t_cwd = 1; 600 $wanted_callback = $wanted->{wanted}; 601 $bydepth = $wanted->{bydepth}; 602 $pre_process = $wanted->{preprocess}; 603 $post_process = $wanted->{postprocess}; 604 $no_chdir = $wanted->{no_chdir}; 605 $full_check = $wanted->{follow}; 606 $follow = $full_check || $wanted->{follow_fast}; 607 $follow_skip = $wanted->{follow_skip}; 608 $untaint = $wanted->{untaint}; 609 $untaint_pat = $wanted->{untaint_pattern}; 610 $untaint_skip = $wanted->{untaint_skip}; 611 $dangling_symlinks = $wanted->{dangling_symlinks}; 612 613 # for compatibility reasons (find.pl, find2perl) 614 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 615 616 # a symbolic link to a directory doesn't increase the link count 617 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 618 619 my ($abs_dir, $Is_Dir); 620 621 Proc_Top_Item: 622 foreach my $TOP (@_) { 623 my $top_item = $TOP; 624 625 if ($Is_MacOS) { 626 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 627 $top_item = ":$top_item" 628 if ( (-d _) && ( $top_item !~ /:/ ) ); 629 } 630 else { 631 $top_item =~ s|/\z|| unless $top_item eq '/'; 632 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 633 } 634 635 $Is_Dir= 0; 636 637 if ($follow) { 638 639 if ($Is_MacOS) { 640 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety 641 642 if ($top_item eq $File::Find::current_dir) { 643 $abs_dir = $cwd; 644 } 645 else { 646 $abs_dir = contract_name_Mac($cwd, $top_item); 647 unless (defined $abs_dir) { 648 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n"; 649 next Proc_Top_Item; 650 } 651 } 652 653 } 654 else { 655 if (substr($top_item,0,1) eq '/') { 656 $abs_dir = $top_item; 657 } 658 elsif ($top_item eq $File::Find::current_dir) { 659 $abs_dir = $cwd; 660 } 661 else { # care about any ../ 662 $abs_dir = contract_name("$cwd/",$top_item); 663 } 664 } 665 $abs_dir= Follow_SymLink($abs_dir); 666 unless (defined $abs_dir) { 667 if ($dangling_symlinks) { 668 if (ref $dangling_symlinks eq 'CODE') { 669 $dangling_symlinks->($top_item, $cwd); 670 } else { 671 warnings::warnif "$top_item is a dangling symbolic link\n"; 672 } 673 } 674 next Proc_Top_Item; 675 } 676 677 if (-d _) { 678 _find_dir_symlnk($wanted, $abs_dir, $top_item); 679 $Is_Dir= 1; 680 } 681 } 682 else { # no follow 683 $topdir = $top_item; 684 unless (defined $topnlink) { 685 warnings::warnif "Can't stat $top_item: $!\n"; 686 next Proc_Top_Item; 687 } 688 if (-d _) { 689 $top_item =~ s/\.dir\z//i if $Is_VMS; 690 _find_dir($wanted, $top_item, $topnlink); 691 $Is_Dir= 1; 692 } 693 else { 694 $abs_dir= $top_item; 695 } 696 } 697 698 unless ($Is_Dir) { 699 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 700 if ($Is_MacOS) { 701 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_ 702 } 703 else { 704 ($dir,$_) = ('./', $top_item); 705 } 706 } 707 708 $abs_dir = $dir; 709 if (( $untaint ) && (is_tainted($dir) )) { 710 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 711 unless (defined $abs_dir) { 712 if ($untaint_skip == 0) { 713 die "directory $dir is still tainted"; 714 } 715 else { 716 next Proc_Top_Item; 717 } 718 } 719 } 720 721 unless ($no_chdir || chdir $abs_dir) { 722 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 723 next Proc_Top_Item; 724 } 725 726 $name = $abs_dir . $_; # $File::Find::name 727 $_ = $name if $no_chdir; 728 729 { $wanted_callback->() }; # protect against wild "next" 730 731 } 732 733 unless ( $no_chdir ) { 734 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 735 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 736 unless (defined $cwd_untainted) { 737 die "insecure cwd in find(depth)"; 738 } 739 $check_t_cwd = 0; 740 } 741 unless (chdir $cwd_untainted) { 742 die "Can't cd to $cwd: $!\n"; 743 } 744 } 745 } 746} 747 748# API: 749# $wanted 750# $p_dir : "parent directory" 751# $nlink : what came back from the stat 752# preconditions: 753# chdir (if not no_chdir) to dir 754 755sub _find_dir($$$) { 756 my ($wanted, $p_dir, $nlink) = @_; 757 my ($CdLvl,$Level) = (0,0); 758 my @Stack; 759 my @filenames; 760 my ($subcount,$sub_nlink); 761 my $SE= []; 762 my $dir_name= $p_dir; 763 my $dir_pref; 764 my $dir_rel = $File::Find::current_dir; 765 my $tainted = 0; 766 my $no_nlink; 767 768 if ($Is_MacOS) { 769 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface 770 } 771 else { 772 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); 773 } 774 775 local ($dir, $name, $prune, *DIR); 776 777 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 778 my $udir = $p_dir; 779 if (( $untaint ) && (is_tainted($p_dir) )) { 780 ( $udir ) = $p_dir =~ m|$untaint_pat|; 781 unless (defined $udir) { 782 if ($untaint_skip == 0) { 783 die "directory $p_dir is still tainted"; 784 } 785 else { 786 return; 787 } 788 } 789 } 790 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 791 warnings::warnif "Can't cd to $udir: $!\n"; 792 return; 793 } 794 } 795 796 # push the starting directory 797 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 798 799 if ($Is_MacOS) { 800 $p_dir = $dir_pref; # ensure trailing ':' 801 } 802 803 while (defined $SE) { 804 unless ($bydepth) { 805 $dir= $p_dir; # $File::Find::dir 806 $name= $dir_name; # $File::Find::name 807 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 808 # prune may happen here 809 $prune= 0; 810 { $wanted_callback->() }; # protect against wild "next" 811 next if $prune; 812 } 813 814 # change to that directory 815 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 816 my $udir= $dir_rel; 817 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 818 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 819 unless (defined $udir) { 820 if ($untaint_skip == 0) { 821 if ($Is_MacOS) { 822 die "directory ($p_dir) $dir_rel is still tainted"; 823 } 824 else { 825 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 826 } 827 } else { # $untaint_skip == 1 828 next; 829 } 830 } 831 } 832 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 833 if ($Is_MacOS) { 834 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n"; 835 } 836 else { 837 warnings::warnif "Can't cd to (" . 838 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 839 } 840 next; 841 } 842 $CdLvl++; 843 } 844 845 if ($Is_MacOS) { 846 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); 847 } 848 849 $dir= $dir_name; # $File::Find::dir 850 851 # Get the list of files in the current directory. 852 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 853 warnings::warnif "Can't opendir($dir_name): $!\n"; 854 next; 855 } 856 @filenames = readdir DIR; 857 closedir(DIR); 858 @filenames = $pre_process->(@filenames) if $pre_process; 859 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 860 861 # default: use whatever was specifid 862 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 863 $no_nlink = $avoid_nlink; 864 # if dir has wrong nlink count, force switch to slower stat method 865 $no_nlink = 1 if ($nlink < 2); 866 867 if ($nlink == 2 && !$no_nlink) { 868 # This dir has no subdirectories. 869 for my $FN (@filenames) { 870 next if $FN =~ $File::Find::skip_pattern; 871 872 $name = $dir_pref . $FN; # $File::Find::name 873 $_ = ($no_chdir ? $name : $FN); # $_ 874 { $wanted_callback->() }; # protect against wild "next" 875 } 876 877 } 878 else { 879 # This dir has subdirectories. 880 $subcount = $nlink - 2; 881 882 # HACK: insert directories at this position. so as to preserve 883 # the user pre-processed ordering of files. 884 # EG: directory traversal is in user sorted order, not at random. 885 my $stack_top = @Stack; 886 887 for my $FN (@filenames) { 888 next if $FN =~ $File::Find::skip_pattern; 889 if ($subcount > 0 || $no_nlink) { 890 # Seen all the subdirs? 891 # check for directoriness. 892 # stat is faster for a file in the current directory 893 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 894 895 if (-d _) { 896 --$subcount; 897 $FN =~ s/\.dir\z//i if $Is_VMS; 898 # HACK: replace push to preserve dir traversal order 899 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 900 splice @Stack, $stack_top, 0, 901 [$CdLvl,$dir_name,$FN,$sub_nlink]; 902 } 903 else { 904 $name = $dir_pref . $FN; # $File::Find::name 905 $_= ($no_chdir ? $name : $FN); # $_ 906 { $wanted_callback->() }; # protect against wild "next" 907 } 908 } 909 else { 910 $name = $dir_pref . $FN; # $File::Find::name 911 $_= ($no_chdir ? $name : $FN); # $_ 912 { $wanted_callback->() }; # protect against wild "next" 913 } 914 } 915 } 916 } 917 continue { 918 while ( defined ($SE = pop @Stack) ) { 919 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 920 if ($CdLvl > $Level && !$no_chdir) { 921 my $tmp; 922 if ($Is_MacOS) { 923 $tmp = (':' x ($CdLvl-$Level)) . ':'; 924 } 925 else { 926 $tmp = join('/',('..') x ($CdLvl-$Level)); 927 } 928 die "Can't cd to $dir_name" . $tmp 929 unless chdir ($tmp); 930 $CdLvl = $Level; 931 } 932 933 if ($Is_MacOS) { 934 # $pdir always has a trailing ':', except for the starting dir, 935 # where $dir_rel eq ':' 936 $dir_name = "$p_dir$dir_rel"; 937 $dir_pref = "$dir_name:"; 938 } 939 else { 940 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 941 $dir_pref = "$dir_name/"; 942 } 943 944 if ( $nlink == -2 ) { 945 $name = $dir = $p_dir; # $File::Find::name / dir 946 $_ = $File::Find::current_dir; 947 $post_process->(); # End-of-directory processing 948 } 949 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 950 $name = $dir_name; 951 if ($Is_MacOS) { 952 if ($dir_rel eq ':') { # must be the top dir, where we started 953 $name =~ s|:$||; # $File::Find::name 954 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); 955 } 956 $dir = $p_dir; # $File::Find::dir 957 $_ = ($no_chdir ? $name : $dir_rel); # $_ 958 } 959 else { 960 if ( substr($name,-2) eq '/.' ) { 961 substr($name, length($name) == 2 ? -1 : -2) = ''; 962 } 963 $dir = $p_dir; 964 $_ = ($no_chdir ? $dir_name : $dir_rel ); 965 if ( substr($_,-2) eq '/.' ) { 966 substr($_, length($_) == 2 ? -1 : -2) = ''; 967 } 968 } 969 { $wanted_callback->() }; # protect against wild "next" 970 } 971 else { 972 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 973 last; 974 } 975 } 976 } 977} 978 979 980# API: 981# $wanted 982# $dir_loc : absolute location of a dir 983# $p_dir : "parent directory" 984# preconditions: 985# chdir (if not no_chdir) to dir 986 987sub _find_dir_symlnk($$$) { 988 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 989 my @Stack; 990 my @filenames; 991 my $new_loc; 992 my $updir_loc = $dir_loc; # untainted parent directory 993 my $SE = []; 994 my $dir_name = $p_dir; 995 my $dir_pref; 996 my $loc_pref; 997 my $dir_rel = $File::Find::current_dir; 998 my $byd_flag; # flag for pending stack entry if $bydepth 999 my $tainted = 0; 1000 my $ok = 1; 1001 1002 if ($Is_MacOS) { 1003 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:"; 1004 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:"; 1005 } else { 1006 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); 1007 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); 1008 } 1009 1010 local ($dir, $name, $fullname, $prune, *DIR); 1011 1012 unless ($no_chdir) { 1013 # untaint the topdir 1014 if (( $untaint ) && (is_tainted($dir_loc) )) { 1015 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 1016 # once untainted, $updir_loc is pushed on the stack (as parent directory); 1017 # hence, we don't need to untaint the parent directory every time we chdir 1018 # to it later 1019 unless (defined $updir_loc) { 1020 if ($untaint_skip == 0) { 1021 die "directory $dir_loc is still tainted"; 1022 } 1023 else { 1024 return; 1025 } 1026 } 1027 } 1028 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 1029 unless ($ok) { 1030 warnings::warnif "Can't cd to $updir_loc: $!\n"; 1031 return; 1032 } 1033 } 1034 1035 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 1036 1037 if ($Is_MacOS) { 1038 $p_dir = $dir_pref; # ensure trailing ':' 1039 } 1040 1041 while (defined $SE) { 1042 1043 unless ($bydepth) { 1044 # change (back) to parent directory (always untainted) 1045 unless ($no_chdir) { 1046 unless (chdir $updir_loc) { 1047 warnings::warnif "Can't cd to $updir_loc: $!\n"; 1048 next; 1049 } 1050 } 1051 $dir= $p_dir; # $File::Find::dir 1052 $name= $dir_name; # $File::Find::name 1053 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 1054 $fullname= $dir_loc; # $File::Find::fullname 1055 # prune may happen here 1056 $prune= 0; 1057 lstat($_); # make sure file tests with '_' work 1058 { $wanted_callback->() }; # protect against wild "next" 1059 next if $prune; 1060 } 1061 1062 # change to that directory 1063 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 1064 $updir_loc = $dir_loc; 1065 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 1066 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 1067 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 1068 unless (defined $updir_loc) { 1069 if ($untaint_skip == 0) { 1070 die "directory $dir_loc is still tainted"; 1071 } 1072 else { 1073 next; 1074 } 1075 } 1076 } 1077 unless (chdir $updir_loc) { 1078 warnings::warnif "Can't cd to $updir_loc: $!\n"; 1079 next; 1080 } 1081 } 1082 1083 if ($Is_MacOS) { 1084 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/); 1085 } 1086 1087 $dir = $dir_name; # $File::Find::dir 1088 1089 # Get the list of files in the current directory. 1090 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 1091 warnings::warnif "Can't opendir($dir_loc): $!\n"; 1092 next; 1093 } 1094 @filenames = readdir DIR; 1095 closedir(DIR); 1096 1097 for my $FN (@filenames) { 1098 next if $FN =~ $File::Find::skip_pattern; 1099 1100 # follow symbolic links / do an lstat 1101 $new_loc = Follow_SymLink($loc_pref.$FN); 1102 1103 # ignore if invalid symlink 1104 next unless defined $new_loc; 1105 1106 if (-d _) { 1107 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 1108 } 1109 else { 1110 $fullname = $new_loc; # $File::Find::fullname 1111 $name = $dir_pref . $FN; # $File::Find::name 1112 $_ = ($no_chdir ? $name : $FN); # $_ 1113 { $wanted_callback->() }; # protect against wild "next" 1114 } 1115 } 1116 1117 } 1118 continue { 1119 while (defined($SE = pop @Stack)) { 1120 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 1121 if ($Is_MacOS) { 1122 # $p_dir always has a trailing ':', except for the starting dir, 1123 # where $dir_rel eq ':' 1124 $dir_name = "$p_dir$dir_rel"; 1125 $dir_pref = "$dir_name:"; 1126 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:"; 1127 } 1128 else { 1129 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 1130 $dir_pref = "$dir_name/"; 1131 $loc_pref = "$dir_loc/"; 1132 } 1133 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 1134 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 1135 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 1136 warnings::warnif "Can't cd to $updir_loc: $!\n"; 1137 next; 1138 } 1139 } 1140 $fullname = $dir_loc; # $File::Find::fullname 1141 $name = $dir_name; # $File::Find::name 1142 if ($Is_MacOS) { 1143 if ($dir_rel eq ':') { # must be the top dir, where we started 1144 $name =~ s|:$||; # $File::Find::name 1145 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/); 1146 } 1147 $dir = $p_dir; # $File::Find::dir 1148 $_ = ($no_chdir ? $name : $dir_rel); # $_ 1149 } 1150 else { 1151 if ( substr($name,-2) eq '/.' ) { 1152 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 1153 } 1154 $dir = $p_dir; # $File::Find::dir 1155 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 1156 if ( substr($_,-2) eq '/.' ) { 1157 substr($_, length($_) == 2 ? -1 : -2) = ''; 1158 } 1159 } 1160 1161 lstat($_); # make sure file tests with '_' work 1162 { $wanted_callback->() }; # protect against wild "next" 1163 } 1164 else { 1165 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 1166 last; 1167 } 1168 } 1169 } 1170} 1171 1172 1173sub wrap_wanted { 1174 my $wanted = shift; 1175 if ( ref($wanted) eq 'HASH' ) { 1176 if ( $wanted->{follow} || $wanted->{follow_fast}) { 1177 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 1178 } 1179 if ( $wanted->{untaint} ) { 1180 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 1181 unless defined $wanted->{untaint_pattern}; 1182 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 1183 } 1184 return $wanted; 1185 } 1186 else { 1187 return { wanted => $wanted }; 1188 } 1189} 1190 1191sub find { 1192 my $wanted = shift; 1193 _find_opt(wrap_wanted($wanted), @_); 1194} 1195 1196sub finddepth { 1197 my $wanted = wrap_wanted(shift); 1198 $wanted->{bydepth} = 1; 1199 _find_opt($wanted, @_); 1200} 1201 1202# default 1203$File::Find::skip_pattern = qr/^\.{1,2}\z/; 1204$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 1205 1206# These are hard-coded for now, but may move to hint files. 1207if ($^O eq 'VMS') { 1208 $Is_VMS = 1; 1209 $File::Find::dont_use_nlink = 1; 1210} 1211elsif ($^O eq 'MacOS') { 1212 $Is_MacOS = 1; 1213 $File::Find::dont_use_nlink = 1; 1214 $File::Find::skip_pattern = qr/^Icon\015\z/; 1215 $File::Find::untaint_pattern = qr|^(.+)$|; 1216} 1217 1218# this _should_ work properly on all platforms 1219# where File::Find can be expected to work 1220$File::Find::current_dir = File::Spec->curdir || '.'; 1221 1222$File::Find::dont_use_nlink = 1 1223 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' || 1224 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' || 1225 $^O eq 'nto'; 1226 1227# Set dont_use_nlink in your hint file if your system's stat doesn't 1228# report the number of links in a directory as an indication 1229# of the number of files. 1230# See, e.g. hints/machten.sh for MachTen 2.2. 1231unless ($File::Find::dont_use_nlink) { 1232 require Config; 1233 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); 1234} 1235 1236# We need a function that checks if a scalar is tainted. Either use the 1237# Scalar::Util module's tainted() function or our (slower) pure Perl 1238# fallback is_tainted_pp() 1239{ 1240 local $@; 1241 eval { require Scalar::Util }; 1242 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 1243} 1244 12451; 1246