1package File::Find; 2use 5.006; 3use strict; 4use warnings; 5use warnings::register; 6our $VERSION = '1.34'; 7require Exporter; 8require Cwd; 9 10our @ISA = qw(Exporter); 11our @EXPORT = qw(find finddepth); 12 13 14use strict; 15my $Is_VMS; 16my $Is_Win32; 17 18require File::Basename; 19require File::Spec; 20 21# Should ideally be my() not our() but local() currently 22# refuses to operate on lexicals 23 24our %SLnkSeen; 25our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 26 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 27 $pre_process, $post_process, $dangling_symlinks); 28 29sub contract_name { 30 my ($cdir,$fn) = @_; 31 32 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 33 34 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 35 36 $fn =~ s|^\./||; 37 38 my $abs_name= $cdir . $fn; 39 40 if (substr($fn,0,3) eq '../') { 41 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 42 } 43 44 return $abs_name; 45} 46 47sub PathCombine($$) { 48 my ($Base,$Name) = @_; 49 my $AbsName; 50 51 if (substr($Name,0,1) eq '/') { 52 $AbsName= $Name; 53 } 54 else { 55 $AbsName= contract_name($Base,$Name); 56 } 57 58 # (simple) check for recursion 59 my $newlen= length($AbsName); 60 if ($newlen <= length($Base)) { 61 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 62 && $AbsName eq substr($Base,0,$newlen)) 63 { 64 return undef; 65 } 66 } 67 return $AbsName; 68} 69 70sub Follow_SymLink($) { 71 my ($AbsName) = @_; 72 73 my ($NewName,$DEV, $INO); 74 ($DEV, $INO)= lstat $AbsName; 75 76 while (-l _) { 77 if ($SLnkSeen{$DEV, $INO}++) { 78 if ($follow_skip < 2) { 79 die "$AbsName is encountered a second time"; 80 } 81 else { 82 return undef; 83 } 84 } 85 $NewName= PathCombine($AbsName, readlink($AbsName)); 86 unless(defined $NewName) { 87 if ($follow_skip < 2) { 88 die "$AbsName is a recursive symbolic link"; 89 } 90 else { 91 return undef; 92 } 93 } 94 else { 95 $AbsName= $NewName; 96 } 97 ($DEV, $INO) = lstat($AbsName); 98 return undef unless defined $DEV; # dangling symbolic link 99 } 100 101 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 102 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 103 die "$AbsName encountered a second time"; 104 } 105 else { 106 return undef; 107 } 108 } 109 110 return $AbsName; 111} 112 113our($dir, $name, $fullname, $prune); 114sub _find_dir_symlnk($$$); 115sub _find_dir($$$); 116 117# check whether or not a scalar variable is tainted 118# (code straight from the Camel, 3rd ed., page 561) 119sub is_tainted_pp { 120 my $arg = shift; 121 my $nada = substr($arg, 0, 0); # zero-length 122 local $@; 123 eval { eval "# $nada" }; 124 return length($@) != 0; 125} 126 127sub _find_opt { 128 my $wanted = shift; 129 return unless @_; 130 die "invalid top directory" unless defined $_[0]; 131 132 # This function must local()ize everything because callbacks may 133 # call find() or finddepth() 134 135 local %SLnkSeen; 136 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 137 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 138 $pre_process, $post_process, $dangling_symlinks); 139 local($dir, $name, $fullname, $prune); 140 local *_ = \my $a; 141 142 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 143 if ($Is_VMS) { 144 # VMS returns this by default in VMS format which just doesn't 145 # work for the rest of this module. 146 $cwd = VMS::Filespec::unixpath($cwd); 147 148 # Apparently this is not expected to have a trailing space. 149 # To attempt to make VMS/UNIX conversions mostly reversible, 150 # a trailing slash is needed. The run-time functions ignore the 151 # resulting double slash, but it causes the perl tests to fail. 152 $cwd =~ s#/\z##; 153 154 # This comes up in upper case now, but should be lower. 155 # In the future this could be exact case, no need to change. 156 } 157 my $cwd_untainted = $cwd; 158 my $check_t_cwd = 1; 159 $wanted_callback = $wanted->{wanted}; 160 $bydepth = $wanted->{bydepth}; 161 $pre_process = $wanted->{preprocess}; 162 $post_process = $wanted->{postprocess}; 163 $no_chdir = $wanted->{no_chdir}; 164 $full_check = $Is_Win32 ? 0 : $wanted->{follow}; 165 $follow = $Is_Win32 ? 0 : 166 $full_check || $wanted->{follow_fast}; 167 $follow_skip = $wanted->{follow_skip}; 168 $untaint = $wanted->{untaint}; 169 $untaint_pat = $wanted->{untaint_pattern}; 170 $untaint_skip = $wanted->{untaint_skip}; 171 $dangling_symlinks = $wanted->{dangling_symlinks}; 172 173 # for compatibility reasons (find.pl, find2perl) 174 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 175 176 # a symbolic link to a directory doesn't increase the link count 177 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 178 179 my ($abs_dir, $Is_Dir); 180 181 Proc_Top_Item: 182 foreach my $TOP (@_) { 183 my $top_item = $TOP; 184 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 185 186 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 187 188 if ($Is_Win32) { 189 $top_item =~ s|[/\\]\z|| 190 unless $top_item =~ m{^(?:\w:)?[/\\]$}; 191 } 192 else { 193 $top_item =~ s|/\z|| unless $top_item eq '/'; 194 } 195 196 $Is_Dir= 0; 197 198 if ($follow) { 199 200 if (substr($top_item,0,1) eq '/') { 201 $abs_dir = $top_item; 202 } 203 elsif ($top_item eq $File::Find::current_dir) { 204 $abs_dir = $cwd; 205 } 206 else { # care about any ../ 207 $top_item =~ s/\.dir\z//i if $Is_VMS; 208 $abs_dir = contract_name("$cwd/",$top_item); 209 } 210 $abs_dir= Follow_SymLink($abs_dir); 211 unless (defined $abs_dir) { 212 if ($dangling_symlinks) { 213 if (ref $dangling_symlinks eq 'CODE') { 214 $dangling_symlinks->($top_item, $cwd); 215 } else { 216 warnings::warnif "$top_item is a dangling symbolic link\n"; 217 } 218 } 219 next Proc_Top_Item; 220 } 221 222 if (-d _) { 223 $top_item =~ s/\.dir\z//i if $Is_VMS; 224 _find_dir_symlnk($wanted, $abs_dir, $top_item); 225 $Is_Dir= 1; 226 } 227 } 228 else { # no follow 229 $topdir = $top_item; 230 unless (defined $topnlink) { 231 warnings::warnif "Can't stat $top_item: $!\n"; 232 next Proc_Top_Item; 233 } 234 if (-d _) { 235 $top_item =~ s/\.dir\z//i if $Is_VMS; 236 _find_dir($wanted, $top_item, $topnlink); 237 $Is_Dir= 1; 238 } 239 else { 240 $abs_dir= $top_item; 241 } 242 } 243 244 unless ($Is_Dir) { 245 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 246 ($dir,$_) = ('./', $top_item); 247 } 248 249 $abs_dir = $dir; 250 if (( $untaint ) && (is_tainted($dir) )) { 251 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 252 unless (defined $abs_dir) { 253 if ($untaint_skip == 0) { 254 die "directory $dir is still tainted"; 255 } 256 else { 257 next Proc_Top_Item; 258 } 259 } 260 } 261 262 unless ($no_chdir || chdir $abs_dir) { 263 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 264 next Proc_Top_Item; 265 } 266 267 $name = $abs_dir . $_; # $File::Find::name 268 $_ = $name if $no_chdir; 269 270 { $wanted_callback->() }; # protect against wild "next" 271 272 } 273 274 unless ( $no_chdir ) { 275 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 276 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 277 unless (defined $cwd_untainted) { 278 die "insecure cwd in find(depth)"; 279 } 280 $check_t_cwd = 0; 281 } 282 unless (chdir $cwd_untainted) { 283 die "Can't cd to $cwd: $!\n"; 284 } 285 } 286 } 287} 288 289# API: 290# $wanted 291# $p_dir : "parent directory" 292# $nlink : what came back from the stat 293# preconditions: 294# chdir (if not no_chdir) to dir 295 296sub _find_dir($$$) { 297 my ($wanted, $p_dir, $nlink) = @_; 298 my ($CdLvl,$Level) = (0,0); 299 my @Stack; 300 my @filenames; 301 my ($subcount,$sub_nlink); 302 my $SE= []; 303 my $dir_name= $p_dir; 304 my $dir_pref; 305 my $dir_rel = $File::Find::current_dir; 306 my $tainted = 0; 307 my $no_nlink; 308 309 if ($Is_Win32) { 310 $dir_pref 311 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" ); 312 } elsif ($Is_VMS) { 313 314 # VMS is returning trailing .dir on directories 315 # and trailing . on files and symbolic links 316 # in UNIX syntax. 317 # 318 319 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 320 321 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 322 } 323 else { 324 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" ); 325 } 326 327 local ($dir, $name, $prune, *DIR); 328 329 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 330 my $udir = $p_dir; 331 if (( $untaint ) && (is_tainted($p_dir) )) { 332 ( $udir ) = $p_dir =~ m|$untaint_pat|; 333 unless (defined $udir) { 334 if ($untaint_skip == 0) { 335 die "directory $p_dir is still tainted"; 336 } 337 else { 338 return; 339 } 340 } 341 } 342 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 343 warnings::warnif "Can't cd to $udir: $!\n"; 344 return; 345 } 346 } 347 348 # push the starting directory 349 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 350 351 while (defined $SE) { 352 unless ($bydepth) { 353 $dir= $p_dir; # $File::Find::dir 354 $name= $dir_name; # $File::Find::name 355 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 356 # prune may happen here 357 $prune= 0; 358 { $wanted_callback->() }; # protect against wild "next" 359 next if $prune; 360 } 361 362 # change to that directory 363 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 364 my $udir= $dir_rel; 365 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 366 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 367 unless (defined $udir) { 368 if ($untaint_skip == 0) { 369 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 370 } else { # $untaint_skip == 1 371 next; 372 } 373 } 374 } 375 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 376 warnings::warnif "Can't cd to (" . 377 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 378 next; 379 } 380 $CdLvl++; 381 } 382 383 $dir= $dir_name; # $File::Find::dir 384 385 # Get the list of files in the current directory. 386 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 387 warnings::warnif "Can't opendir($dir_name): $!\n"; 388 next; 389 } 390 @filenames = readdir DIR; 391 closedir(DIR); 392 @filenames = $pre_process->(@filenames) if $pre_process; 393 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 394 395 # default: use whatever was specified 396 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 397 $no_nlink = $avoid_nlink; 398 # if dir has wrong nlink count, force switch to slower stat method 399 $no_nlink = 1 if ($nlink < 2); 400 401 if ($nlink == 2 && !$no_nlink) { 402 # This dir has no subdirectories. 403 for my $FN (@filenames) { 404 if ($Is_VMS) { 405 # Big hammer here - Compensate for VMS trailing . and .dir 406 # No win situation until this is changed, but this 407 # will handle the majority of the cases with breaking the fewest 408 409 $FN =~ s/\.dir\z//i; 410 $FN =~ s#\.$## if ($FN ne '.'); 411 } 412 next if $FN =~ $File::Find::skip_pattern; 413 414 $name = $dir_pref . $FN; # $File::Find::name 415 $_ = ($no_chdir ? $name : $FN); # $_ 416 { $wanted_callback->() }; # protect against wild "next" 417 } 418 419 } 420 else { 421 # This dir has subdirectories. 422 $subcount = $nlink - 2; 423 424 # HACK: insert directories at this position, so as to preserve 425 # the user pre-processed ordering of files (thus ensuring 426 # directory traversal is in user sorted order, not at random). 427 my $stack_top = @Stack; 428 429 for my $FN (@filenames) { 430 next if $FN =~ $File::Find::skip_pattern; 431 if ($subcount > 0 || $no_nlink) { 432 # Seen all the subdirs? 433 # check for directoriness. 434 # stat is faster for a file in the current directory 435 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 436 437 if (-d _) { 438 --$subcount; 439 $FN =~ s/\.dir\z//i if $Is_VMS; 440 # HACK: replace push to preserve dir traversal order 441 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 442 splice @Stack, $stack_top, 0, 443 [$CdLvl,$dir_name,$FN,$sub_nlink]; 444 } 445 else { 446 $name = $dir_pref . $FN; # $File::Find::name 447 $_= ($no_chdir ? $name : $FN); # $_ 448 { $wanted_callback->() }; # protect against wild "next" 449 } 450 } 451 else { 452 $name = $dir_pref . $FN; # $File::Find::name 453 $_= ($no_chdir ? $name : $FN); # $_ 454 { $wanted_callback->() }; # protect against wild "next" 455 } 456 } 457 } 458 } 459 continue { 460 while ( defined ($SE = pop @Stack) ) { 461 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 462 if ($CdLvl > $Level && !$no_chdir) { 463 my $tmp; 464 if ($Is_VMS) { 465 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 466 } 467 else { 468 $tmp = join('/',('..') x ($CdLvl-$Level)); 469 } 470 die "Can't cd to $tmp from $dir_name: $!" 471 unless chdir ($tmp); 472 $CdLvl = $Level; 473 } 474 475 if ($Is_Win32) { 476 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} 477 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"); 478 $dir_pref = "$dir_name/"; 479 } 480 elsif ($^O eq 'VMS') { 481 if ($p_dir =~ m/[\]>]+$/) { 482 $dir_name = $p_dir; 483 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 484 $dir_pref = $dir_name; 485 } 486 else { 487 $dir_name = "$p_dir/$dir_rel"; 488 $dir_pref = "$dir_name/"; 489 } 490 } 491 else { 492 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 493 $dir_pref = "$dir_name/"; 494 } 495 496 if ( $nlink == -2 ) { 497 $name = $dir = $p_dir; # $File::Find::name / dir 498 $_ = $File::Find::current_dir; 499 $post_process->(); # End-of-directory processing 500 } 501 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 502 $name = $dir_name; 503 if ( substr($name,-2) eq '/.' ) { 504 substr($name, length($name) == 2 ? -1 : -2) = ''; 505 } 506 $dir = $p_dir; 507 $_ = ($no_chdir ? $dir_name : $dir_rel ); 508 if ( substr($_,-2) eq '/.' ) { 509 substr($_, length($_) == 2 ? -1 : -2) = ''; 510 } 511 { $wanted_callback->() }; # protect against wild "next" 512 } 513 else { 514 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 515 last; 516 } 517 } 518 } 519} 520 521 522# API: 523# $wanted 524# $dir_loc : absolute location of a dir 525# $p_dir : "parent directory" 526# preconditions: 527# chdir (if not no_chdir) to dir 528 529sub _find_dir_symlnk($$$) { 530 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 531 my @Stack; 532 my @filenames; 533 my $new_loc; 534 my $updir_loc = $dir_loc; # untainted parent directory 535 my $SE = []; 536 my $dir_name = $p_dir; 537 my $dir_pref; 538 my $loc_pref; 539 my $dir_rel = $File::Find::current_dir; 540 my $byd_flag; # flag for pending stack entry if $bydepth 541 my $tainted = 0; 542 my $ok = 1; 543 544 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" ); 545 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" ); 546 547 local ($dir, $name, $fullname, $prune, *DIR); 548 549 unless ($no_chdir) { 550 # untaint the topdir 551 if (( $untaint ) && (is_tainted($dir_loc) )) { 552 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 553 # once untainted, $updir_loc is pushed on the stack (as parent directory); 554 # hence, we don't need to untaint the parent directory every time we chdir 555 # to it later 556 unless (defined $updir_loc) { 557 if ($untaint_skip == 0) { 558 die "directory $dir_loc is still tainted"; 559 } 560 else { 561 return; 562 } 563 } 564 } 565 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 566 unless ($ok) { 567 warnings::warnif "Can't cd to $updir_loc: $!\n"; 568 return; 569 } 570 } 571 572 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 573 574 while (defined $SE) { 575 576 unless ($bydepth) { 577 # change (back) to parent directory (always untainted) 578 unless ($no_chdir) { 579 unless (chdir $updir_loc) { 580 warnings::warnif "Can't cd to $updir_loc: $!\n"; 581 next; 582 } 583 } 584 $dir= $p_dir; # $File::Find::dir 585 $name= $dir_name; # $File::Find::name 586 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 587 $fullname= $dir_loc; # $File::Find::fullname 588 # prune may happen here 589 $prune= 0; 590 lstat($_); # make sure file tests with '_' work 591 { $wanted_callback->() }; # protect against wild "next" 592 next if $prune; 593 } 594 595 # change to that directory 596 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 597 $updir_loc = $dir_loc; 598 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 599 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 600 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 601 unless (defined $updir_loc) { 602 if ($untaint_skip == 0) { 603 die "directory $dir_loc is still tainted"; 604 } 605 else { 606 next; 607 } 608 } 609 } 610 unless (chdir $updir_loc) { 611 warnings::warnif "Can't cd to $updir_loc: $!\n"; 612 next; 613 } 614 } 615 616 $dir = $dir_name; # $File::Find::dir 617 618 # Get the list of files in the current directory. 619 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 620 warnings::warnif "Can't opendir($dir_loc): $!\n"; 621 next; 622 } 623 @filenames = readdir DIR; 624 closedir(DIR); 625 626 for my $FN (@filenames) { 627 if ($Is_VMS) { 628 # Big hammer here - Compensate for VMS trailing . and .dir 629 # No win situation until this is changed, but this 630 # will handle the majority of the cases with breaking the fewest. 631 632 $FN =~ s/\.dir\z//i; 633 $FN =~ s#\.$## if ($FN ne '.'); 634 } 635 next if $FN =~ $File::Find::skip_pattern; 636 637 # follow symbolic links / do an lstat 638 $new_loc = Follow_SymLink($loc_pref.$FN); 639 640 # ignore if invalid symlink 641 unless (defined $new_loc) { 642 if (!defined -l _ && $dangling_symlinks) { 643 $fullname = undef; 644 if (ref $dangling_symlinks eq 'CODE') { 645 $dangling_symlinks->($FN, $dir_pref); 646 } else { 647 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 648 } 649 } 650 else { 651 $fullname = $loc_pref . $FN; 652 } 653 $name = $dir_pref . $FN; 654 $_ = ($no_chdir ? $name : $FN); 655 { $wanted_callback->() }; 656 next; 657 } 658 659 if (-d _) { 660 if ($Is_VMS) { 661 $FN =~ s/\.dir\z//i; 662 $FN =~ s#\.$## if ($FN ne '.'); 663 $new_loc =~ s/\.dir\z//i; 664 $new_loc =~ s#\.$## if ($new_loc ne '.'); 665 } 666 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 667 } 668 else { 669 $fullname = $new_loc; # $File::Find::fullname 670 $name = $dir_pref . $FN; # $File::Find::name 671 $_ = ($no_chdir ? $name : $FN); # $_ 672 { $wanted_callback->() }; # protect against wild "next" 673 } 674 } 675 676 } 677 continue { 678 while (defined($SE = pop @Stack)) { 679 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 680 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel"); 681 $dir_pref = "$dir_name/"; 682 $loc_pref = "$dir_loc/"; 683 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 684 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 685 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 686 warnings::warnif "Can't cd to $updir_loc: $!\n"; 687 next; 688 } 689 } 690 $fullname = $dir_loc; # $File::Find::fullname 691 $name = $dir_name; # $File::Find::name 692 if ( substr($name,-2) eq '/.' ) { 693 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 694 } 695 $dir = $p_dir; # $File::Find::dir 696 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 697 if ( substr($_,-2) eq '/.' ) { 698 substr($_, length($_) == 2 ? -1 : -2) = ''; 699 } 700 701 lstat($_); # make sure file tests with '_' work 702 { $wanted_callback->() }; # protect against wild "next" 703 } 704 else { 705 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 706 last; 707 } 708 } 709 } 710} 711 712 713sub wrap_wanted { 714 my $wanted = shift; 715 if ( ref($wanted) eq 'HASH' ) { 716 # RT #122547 717 my %valid_options = map {$_ => 1} qw( 718 wanted 719 bydepth 720 preprocess 721 postprocess 722 follow 723 follow_fast 724 follow_skip 725 dangling_symlinks 726 no_chdir 727 untaint 728 untaint_pattern 729 untaint_skip 730 ); 731 my @invalid_options = (); 732 for my $v (keys %{$wanted}) { 733 push @invalid_options, $v unless exists $valid_options{$v}; 734 } 735 warn "Invalid option(s): @invalid_options" if @invalid_options; 736 737 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 738 die 'no &wanted subroutine given'; 739 } 740 if ( $wanted->{follow} || $wanted->{follow_fast}) { 741 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 742 } 743 if ( $wanted->{untaint} ) { 744 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 745 unless defined $wanted->{untaint_pattern}; 746 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 747 } 748 return $wanted; 749 } 750 elsif( ref( $wanted ) eq 'CODE' ) { 751 return { wanted => $wanted }; 752 } 753 else { 754 die 'no &wanted subroutine given'; 755 } 756} 757 758sub find { 759 my $wanted = shift; 760 _find_opt(wrap_wanted($wanted), @_); 761} 762 763sub finddepth { 764 my $wanted = wrap_wanted(shift); 765 $wanted->{bydepth} = 1; 766 _find_opt($wanted, @_); 767} 768 769# default 770$File::Find::skip_pattern = qr/^\.{1,2}\z/; 771$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 772 773# These are hard-coded for now, but may move to hint files. 774if ($^O eq 'VMS') { 775 $Is_VMS = 1; 776 $File::Find::dont_use_nlink = 1; 777} 778elsif ($^O eq 'MSWin32') { 779 $Is_Win32 = 1; 780} 781 782# this _should_ work properly on all platforms 783# where File::Find can be expected to work 784$File::Find::current_dir = File::Spec->curdir || '.'; 785 786$File::Find::dont_use_nlink = 1 787 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 || 788 $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'qnx' || $^O eq 'nto'; 789 790# Set dont_use_nlink in your hint file if your system's stat doesn't 791# report the number of links in a directory as an indication 792# of the number of files. 793# See e.g. hints/haiku.sh for Haiku. 794unless ($File::Find::dont_use_nlink) { 795 require Config; 796 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'}); 797} 798 799# We need a function that checks if a scalar is tainted. Either use the 800# Scalar::Util module's tainted() function or our (slower) pure Perl 801# fallback is_tainted_pp() 802{ 803 local $@; 804 eval { require Scalar::Util }; 805 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 806} 807 8081; 809 810__END__ 811 812=head1 NAME 813 814File::Find - Traverse a directory tree. 815 816=head1 SYNOPSIS 817 818 use File::Find; 819 find(\&wanted, @directories_to_search); 820 sub wanted { ... } 821 822 use File::Find; 823 finddepth(\&wanted, @directories_to_search); 824 sub wanted { ... } 825 826 use File::Find; 827 find({ wanted => \&process, follow => 1 }, '.'); 828 829=head1 DESCRIPTION 830 831These are functions for searching through directory trees doing work 832on each file found similar to the Unix I<find> command. File::Find 833exports two functions, C<find> and C<finddepth>. They work similarly 834but have subtle differences. 835 836=over 4 837 838=item B<find> 839 840 find(\&wanted, @directories); 841 find(\%options, @directories); 842 843C<find()> does a depth-first search over the given C<@directories> in 844the order they are given. For each file or directory found, it calls 845the C<&wanted> subroutine. (See below for details on how to use the 846C<&wanted> function). Additionally, for each directory found, it will 847C<chdir()> into that directory and continue the search, invoking the 848C<&wanted> function on each file or subdirectory in the directory. 849 850=item B<finddepth> 851 852 finddepth(\&wanted, @directories); 853 finddepth(\%options, @directories); 854 855C<finddepth()> works just like C<find()> except that it invokes the 856C<&wanted> function for a directory I<after> invoking it for the 857directory's contents. It does a postorder traversal instead of a 858preorder traversal, working from the bottom of the directory tree up 859where C<find()> works from the top of the tree down. 860 861=back 862 863=head2 %options 864 865The first argument to C<find()> is either a code reference to your 866C<&wanted> function, or a hash reference describing the operations 867to be performed for each file. The 868code reference is described in L</The wanted function> below. 869 870Here are the possible keys for the hash: 871 872=over 3 873 874=item C<wanted> 875 876The value should be a code reference. This code reference is 877described in L</The wanted function> below. The C<&wanted> subroutine is 878mandatory. 879 880=item C<bydepth> 881 882Reports the name of a directory only AFTER all its entries 883have been reported. Entry point C<finddepth()> is a shortcut for 884specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 885 886=item C<preprocess> 887 888The value should be a code reference. This code reference is used to 889preprocess the current directory. The name of the currently processed 890directory is in C<$File::Find::dir>. Your preprocessing function is 891called after C<readdir()>, but before the loop that calls the C<wanted()> 892function. It is called with a list of strings (actually file/directory 893names) and is expected to return a list of strings. The code can be 894used to sort the file/directory names alphabetically, numerically, 895or to filter out directory entries based on their name alone. When 896I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op. 897 898=item C<postprocess> 899 900The value should be a code reference. It is invoked just before leaving 901the currently processed directory. It is called in void context with no 902arguments. The name of the current directory is in C<$File::Find::dir>. This 903hook is handy for summarizing a directory, such as calculating its disk 904usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a 905no-op. 906 907=item C<follow> 908 909Causes symbolic links to be followed. Since directory trees with symbolic 910links (followed) may contain files more than once and may even have 911cycles, a hash has to be built up with an entry for each file. 912This might be expensive both in space and time for a large 913directory tree. See L</follow_fast> and L</follow_skip> below. 914If either I<follow> or I<follow_fast> is in effect: 915 916=over 6 917 918=item * 919 920It is guaranteed that an I<lstat> has been called before the user's 921C<wanted()> function is called. This enables fast file checks involving C<_>. 922Note that this guarantee no longer holds if I<follow> or I<follow_fast> 923are not set. 924 925=item * 926 927There is a variable C<$File::Find::fullname> which holds the absolute 928pathname of the file with all symbolic links resolved. If the link is 929a dangling symbolic link, then fullname will be set to C<undef>. 930 931=back 932 933This is a no-op on Win32. 934 935=item C<follow_fast> 936 937This is similar to I<follow> except that it may report some files more 938than once. It does detect cycles, however. Since only symbolic links 939have to be hashed, this is much cheaper both in space and time. If 940processing a file more than once (by the user's C<wanted()> function) 941is worse than just taking time, the option I<follow> should be used. 942 943This is also a no-op on Win32. 944 945=item C<follow_skip> 946 947C<follow_skip==1>, which is the default, causes all files which are 948neither directories nor symbolic links to be ignored if they are about 949to be processed a second time. If a directory or a symbolic link 950are about to be processed a second time, File::Find dies. 951 952C<follow_skip==0> causes File::Find to die if any file is about to be 953processed a second time. 954 955C<follow_skip==2> causes File::Find to ignore any duplicate files and 956directories but to proceed normally otherwise. 957 958=item C<dangling_symlinks> 959 960Specifies what to do with symbolic links whose target doesn't exist. 961If true and a code reference, will be called with the symbolic link 962name and the directory it lives in as arguments. Otherwise, if true 963and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 964symbolic link\n"> will be issued. If false, the dangling symbolic link 965will be silently ignored. 966 967=item C<no_chdir> 968 969Does not C<chdir()> to each directory as it recurses. The C<wanted()> 970function will need to be aware of this, of course. In this case, 971C<$_> will be the same as C<$File::Find::name>. 972 973=item C<untaint> 974 975If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or 976if EUID != UID or if EGID != GID), then internally directory names have to be 977untainted before they can be C<chdir>'d to. Therefore they are checked against 978a regular expression I<untaint_pattern>. Note that all names passed to the 979user's C<wanted()> function are still tainted. If this option is used while not 980in taint-mode, C<untaint> is a no-op. 981 982=item C<untaint_pattern> 983 984See above. This should be set using the C<qr> quoting operator. 985The default is set to C<qr|^([-+@\w./]+)$|>. 986Note that the parentheses are vital. 987 988=item C<untaint_skip> 989 990If set, a directory which fails the I<untaint_pattern> is skipped, 991including all its sub-directories. The default is to C<die> in such a case. 992 993=back 994 995=head2 The wanted function 996 997The C<wanted()> function does whatever verifications you want on 998each file and directory. Note that despite its name, the C<wanted()> 999function is a generic callback function, and does B<not> tell 1000File::Find if a file is "wanted" or not. In fact, its return value 1001is ignored. 1002 1003The wanted function takes no arguments but rather does its work 1004through a collection of variables. 1005 1006=over 4 1007 1008=item C<$File::Find::dir> is the current directory name, 1009 1010=item C<$_> is the current filename within that directory 1011 1012=item C<$File::Find::name> is the complete pathname to the file. 1013 1014=back 1015 1016The above variables have all been localized and may be changed without 1017affecting data outside of the wanted function. 1018 1019For example, when examining the file F</some/path/foo.ext> you will have: 1020 1021 $File::Find::dir = /some/path/ 1022 $_ = foo.ext 1023 $File::Find::name = /some/path/foo.ext 1024 1025You are chdir()'d to C<$File::Find::dir> when the function is called, 1026unless C<no_chdir> was specified. Note that when changing to 1027directories is in effect, the root directory (F</>) is a somewhat 1028special case inasmuch as the concatenation of C<$File::Find::dir>, 1029C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1030table below summarizes all variants: 1031 1032 $File::Find::name $File::Find::dir $_ 1033 default / / . 1034 no_chdir=>0 /etc / etc 1035 /etc/x /etc x 1036 1037 no_chdir=>1 / / / 1038 /etc / /etc 1039 /etc/x /etc /etc/x 1040 1041 1042When C<follow> or C<follow_fast> are in effect, there is 1043also a C<$File::Find::fullname>. The function may set 1044C<$File::Find::prune> to prune the tree unless C<bydepth> was 1045specified. Unless C<follow> or C<follow_fast> is specified, for 1046compatibility reasons (find.pl, find2perl) there are in addition the 1047following globals available: C<$File::Find::topdir>, 1048C<$File::Find::topdev>, C<$File::Find::topino>, 1049C<$File::Find::topmode> and C<$File::Find::topnlink>. 1050 1051This library is useful for the C<find2perl> tool (distributed as part of the 1052App-find2perl CPAN distribution), which when fed, 1053 1054 find2perl / -name .nfs\* -mtime +7 \ 1055 -exec rm -f {} \; -o -fstype nfs -prune 1056 1057produces something like: 1058 1059 sub wanted { 1060 /^\.nfs.*\z/s && 1061 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1062 int(-M _) > 7 && 1063 unlink($_) 1064 || 1065 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1066 $dev < 0 && 1067 ($File::Find::prune = 1); 1068 } 1069 1070Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1071filehandle that caches the information from the preceding 1072C<stat()>, C<lstat()>, or filetest. 1073 1074Here's another interesting wanted function. It will find all symbolic 1075links that don't resolve: 1076 1077 sub wanted { 1078 -l && !-e && print "bogus link: $File::Find::name\n"; 1079 } 1080 1081Note that you may mix directories and (non-directory) files in the list of 1082directories to be searched by the C<wanted()> function. 1083 1084 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1085 1086In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1087evaluated by C<wanted()>. 1088 1089See also the script C<pfind> on CPAN for a nice application of this 1090module. 1091 1092=head1 WARNINGS 1093 1094If you run your program with the C<-w> switch, or if you use the 1095C<warnings> pragma, File::Find will report warnings for several weird 1096situations. You can disable these warnings by putting the statement 1097 1098 no warnings 'File::Find'; 1099 1100in the appropriate scope. See L<warnings> for more info about lexical 1101warnings. 1102 1103=head1 CAVEAT 1104 1105=over 2 1106 1107=item $dont_use_nlink 1108 1109You can set the variable C<$File::Find::dont_use_nlink> to 1 if you want to 1110force File::Find to always stat directories. This was used for file systems 1111that do not have an C<nlink> count matching the number of sub-directories. 1112Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file 1113system) and a couple of others. 1114 1115You shouldn't need to set this variable, since File::Find should now detect 1116such file systems on-the-fly and switch itself to using stat. This works even 1117for parts of your file system, like a mounted CD-ROM. 1118 1119If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs. 1120 1121=item symlinks 1122 1123Be aware that the option to follow symbolic links can be dangerous. 1124Depending on the structure of the directory tree (including symbolic 1125links to directories) you might traverse a given (physical) directory 1126more than once (only if C<follow_fast> is in effect). 1127Furthermore, deleting or changing files in a symbolically linked directory 1128might cause very unpleasant surprises, since you delete or change files 1129in an unknown directory. 1130 1131=back 1132 1133=head1 BUGS AND CAVEATS 1134 1135Despite the name of the C<finddepth()> function, both C<find()> and 1136C<finddepth()> perform a depth-first search of the directory 1137hierarchy. 1138 1139=head1 HISTORY 1140 1141File::Find used to produce incorrect results if called recursively. 1142During the development of perl 5.8 this bug was fixed. 1143The first fixed version of File::Find was 1.01. 1144 1145=head1 SEE ALSO 1146 1147L<find(1)>, find2perl. 1148 1149=cut 1150