16fb12b70Safresh1package File::Find; 2*3d61058aSafresh1 36fb12b70Safresh1use 5.006; 46fb12b70Safresh1use strict; 56fb12b70Safresh1use warnings; 66fb12b70Safresh1use warnings::register; 7*3d61058aSafresh1 8eac174f2Safresh1use Exporter 'import'; 96fb12b70Safresh1require Cwd; 10*3d61058aSafresh1require File::Basename; 11*3d61058aSafresh1require File::Spec; 126fb12b70Safresh1 13*3d61058aSafresh1our $VERSION = '1.44'; 146fb12b70Safresh1our @EXPORT = qw(find finddepth); 156fb12b70Safresh1 16b46d8ef2Safresh1my $Is_VMS = $^O eq 'VMS'; 17b46d8ef2Safresh1my $Is_Win32 = $^O eq 'MSWin32'; 186fb12b70Safresh1 196fb12b70Safresh1 206fb12b70Safresh1# Should ideally be my() not our() but local() currently 216fb12b70Safresh1# refuses to operate on lexicals 226fb12b70Safresh1 236fb12b70Safresh1our %SLnkSeen; 246fb12b70Safresh1our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 256fb12b70Safresh1 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 266fb12b70Safresh1 $pre_process, $post_process, $dangling_symlinks); 276fb12b70Safresh1 286fb12b70Safresh1sub contract_name { 296fb12b70Safresh1 my ($cdir,$fn) = @_; 306fb12b70Safresh1 316fb12b70Safresh1 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir; 326fb12b70Safresh1 336fb12b70Safresh1 $cdir = substr($cdir,0,rindex($cdir,'/')+1); 346fb12b70Safresh1 356fb12b70Safresh1 $fn =~ s|^\./||; 366fb12b70Safresh1 376fb12b70Safresh1 my $abs_name= $cdir . $fn; 386fb12b70Safresh1 396fb12b70Safresh1 if (substr($fn,0,3) eq '../') { 406fb12b70Safresh1 1 while $abs_name =~ s!/[^/]*/\.\./+!/!; 416fb12b70Safresh1 } 426fb12b70Safresh1 436fb12b70Safresh1 return $abs_name; 446fb12b70Safresh1} 456fb12b70Safresh1 46e0680481Safresh1sub _is_absolute { 47e0680481Safresh1 return $_[0] =~ m|^(?:[A-Za-z]:)?/| if $Is_Win32; 48e0680481Safresh1 return substr($_[0], 0, 1) eq '/'; 49e0680481Safresh1} 50e0680481Safresh1 51e0680481Safresh1sub _is_root { 52e0680481Safresh1 return $_[0] =~ m|^(?:[A-Za-z]:)?/\z| if $Is_Win32; 53e0680481Safresh1 return $_[0] eq '/'; 54e0680481Safresh1} 55e0680481Safresh1 566fb12b70Safresh1sub PathCombine($$) { 576fb12b70Safresh1 my ($Base,$Name) = @_; 586fb12b70Safresh1 my $AbsName; 596fb12b70Safresh1 60e0680481Safresh1 if (_is_absolute($Name)) { 616fb12b70Safresh1 $AbsName= $Name; 626fb12b70Safresh1 } 636fb12b70Safresh1 else { 646fb12b70Safresh1 $AbsName= contract_name($Base,$Name); 656fb12b70Safresh1 } 666fb12b70Safresh1 676fb12b70Safresh1 # (simple) check for recursion 686fb12b70Safresh1 my $newlen= length($AbsName); 696fb12b70Safresh1 if ($newlen <= length($Base)) { 706fb12b70Safresh1 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/') 716fb12b70Safresh1 && $AbsName eq substr($Base,0,$newlen)) 726fb12b70Safresh1 { 736fb12b70Safresh1 return undef; 746fb12b70Safresh1 } 756fb12b70Safresh1 } 766fb12b70Safresh1 return $AbsName; 776fb12b70Safresh1} 786fb12b70Safresh1 796fb12b70Safresh1sub Follow_SymLink($) { 806fb12b70Safresh1 my ($AbsName) = @_; 816fb12b70Safresh1 826fb12b70Safresh1 my ($NewName,$DEV, $INO); 836fb12b70Safresh1 ($DEV, $INO)= lstat $AbsName; 846fb12b70Safresh1 856fb12b70Safresh1 while (-l _) { 866fb12b70Safresh1 if ($SLnkSeen{$DEV, $INO}++) { 876fb12b70Safresh1 if ($follow_skip < 2) { 886fb12b70Safresh1 die "$AbsName is encountered a second time"; 896fb12b70Safresh1 } 906fb12b70Safresh1 else { 916fb12b70Safresh1 return undef; 926fb12b70Safresh1 } 936fb12b70Safresh1 } 94e0680481Safresh1 my $Link = readlink($AbsName); 95e0680481Safresh1 # canonicalize directory separators 96e0680481Safresh1 $Link =~ s|\\|/|g if $Is_Win32; 97e0680481Safresh1 $NewName= PathCombine($AbsName, $Link); 986fb12b70Safresh1 unless(defined $NewName) { 996fb12b70Safresh1 if ($follow_skip < 2) { 1006fb12b70Safresh1 die "$AbsName is a recursive symbolic link"; 1016fb12b70Safresh1 } 1026fb12b70Safresh1 else { 1036fb12b70Safresh1 return undef; 1046fb12b70Safresh1 } 1056fb12b70Safresh1 } 1066fb12b70Safresh1 else { 1076fb12b70Safresh1 $AbsName= $NewName; 1086fb12b70Safresh1 } 1096fb12b70Safresh1 ($DEV, $INO) = lstat($AbsName); 1106fb12b70Safresh1 return undef unless defined $DEV; # dangling symbolic link 1116fb12b70Safresh1 } 1126fb12b70Safresh1 1136fb12b70Safresh1 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) { 1146fb12b70Safresh1 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) { 1156fb12b70Safresh1 die "$AbsName encountered a second time"; 1166fb12b70Safresh1 } 1176fb12b70Safresh1 else { 1186fb12b70Safresh1 return undef; 1196fb12b70Safresh1 } 1206fb12b70Safresh1 } 1216fb12b70Safresh1 1226fb12b70Safresh1 return $AbsName; 1236fb12b70Safresh1} 1246fb12b70Safresh1 1256fb12b70Safresh1our($dir, $name, $fullname, $prune); 1266fb12b70Safresh1sub _find_dir_symlnk($$$); 1276fb12b70Safresh1sub _find_dir($$$); 1286fb12b70Safresh1 1296fb12b70Safresh1# check whether or not a scalar variable is tainted 1306fb12b70Safresh1# (code straight from the Camel, 3rd ed., page 561) 1316fb12b70Safresh1sub is_tainted_pp { 1326fb12b70Safresh1 my $arg = shift; 1336fb12b70Safresh1 my $nada = substr($arg, 0, 0); # zero-length 1346fb12b70Safresh1 local $@; 1356fb12b70Safresh1 eval { eval "# $nada" }; 1366fb12b70Safresh1 return length($@) != 0; 1376fb12b70Safresh1} 1386fb12b70Safresh1 139e0680481Safresh1 1406fb12b70Safresh1sub _find_opt { 1416fb12b70Safresh1 my $wanted = shift; 142b8851fccSafresh1 return unless @_; 1436fb12b70Safresh1 die "invalid top directory" unless defined $_[0]; 1446fb12b70Safresh1 1456fb12b70Safresh1 # This function must local()ize everything because callbacks may 1466fb12b70Safresh1 # call find() or finddepth() 1476fb12b70Safresh1 1486fb12b70Safresh1 local %SLnkSeen; 1496fb12b70Safresh1 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow, 1506fb12b70Safresh1 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat, 1516fb12b70Safresh1 $pre_process, $post_process, $dangling_symlinks); 1526fb12b70Safresh1 local($dir, $name, $fullname, $prune); 1536fb12b70Safresh1 local *_ = \my $a; 1546fb12b70Safresh1 1556fb12b70Safresh1 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd(); 1566fb12b70Safresh1 if ($Is_VMS) { 1576fb12b70Safresh1 # VMS returns this by default in VMS format which just doesn't 1586fb12b70Safresh1 # work for the rest of this module. 1596fb12b70Safresh1 $cwd = VMS::Filespec::unixpath($cwd); 1606fb12b70Safresh1 1616fb12b70Safresh1 # Apparently this is not expected to have a trailing space. 1626fb12b70Safresh1 # To attempt to make VMS/UNIX conversions mostly reversible, 1636fb12b70Safresh1 # a trailing slash is needed. The run-time functions ignore the 1646fb12b70Safresh1 # resulting double slash, but it causes the perl tests to fail. 1656fb12b70Safresh1 $cwd =~ s#/\z##; 1666fb12b70Safresh1 1676fb12b70Safresh1 # This comes up in upper case now, but should be lower. 1686fb12b70Safresh1 # In the future this could be exact case, no need to change. 1696fb12b70Safresh1 } 1706fb12b70Safresh1 my $cwd_untainted = $cwd; 1716fb12b70Safresh1 my $check_t_cwd = 1; 1726fb12b70Safresh1 $wanted_callback = $wanted->{wanted}; 1736fb12b70Safresh1 $bydepth = $wanted->{bydepth}; 1746fb12b70Safresh1 $pre_process = $wanted->{preprocess}; 1756fb12b70Safresh1 $post_process = $wanted->{postprocess}; 1766fb12b70Safresh1 $no_chdir = $wanted->{no_chdir}; 177eac174f2Safresh1 $full_check = $wanted->{follow}; 178eac174f2Safresh1 $follow = $full_check || $wanted->{follow_fast}; 1796fb12b70Safresh1 $follow_skip = $wanted->{follow_skip}; 1806fb12b70Safresh1 $untaint = $wanted->{untaint}; 1816fb12b70Safresh1 $untaint_pat = $wanted->{untaint_pattern}; 1826fb12b70Safresh1 $untaint_skip = $wanted->{untaint_skip}; 1836fb12b70Safresh1 $dangling_symlinks = $wanted->{dangling_symlinks}; 1846fb12b70Safresh1 1856fb12b70Safresh1 # for compatibility reasons (find.pl, find2perl) 1866fb12b70Safresh1 local our ($topdir, $topdev, $topino, $topmode, $topnlink); 1876fb12b70Safresh1 1886fb12b70Safresh1 # a symbolic link to a directory doesn't increase the link count 1896fb12b70Safresh1 $avoid_nlink = $follow || $File::Find::dont_use_nlink; 1906fb12b70Safresh1 1916fb12b70Safresh1 my ($abs_dir, $Is_Dir); 1926fb12b70Safresh1 1936fb12b70Safresh1 Proc_Top_Item: 1946fb12b70Safresh1 foreach my $TOP (@_) { 1956fb12b70Safresh1 my $top_item = $TOP; 1966fb12b70Safresh1 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS; 1976fb12b70Safresh1 1986fb12b70Safresh1 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item; 1996fb12b70Safresh1 200e0680481Safresh1 # canonicalize directory separators 201e0680481Safresh1 $top_item =~ s|[/\\]|/|g if $Is_Win32; 202e0680481Safresh1 203e0680481Safresh1 # no trailing / unless path is root 204e0680481Safresh1 $top_item =~ s|/\z|| unless _is_root($top_item); 2056fb12b70Safresh1 2066fb12b70Safresh1 $Is_Dir= 0; 2076fb12b70Safresh1 2086fb12b70Safresh1 if ($follow) { 2096fb12b70Safresh1 210e0680481Safresh1 if (_is_absolute($top_item)) { 2116fb12b70Safresh1 $abs_dir = $top_item; 2126fb12b70Safresh1 } 2136fb12b70Safresh1 elsif ($top_item eq $File::Find::current_dir) { 2146fb12b70Safresh1 $abs_dir = $cwd; 2156fb12b70Safresh1 } 2166fb12b70Safresh1 else { # care about any ../ 2176fb12b70Safresh1 $top_item =~ s/\.dir\z//i if $Is_VMS; 2186fb12b70Safresh1 $abs_dir = contract_name("$cwd/",$top_item); 2196fb12b70Safresh1 } 2206fb12b70Safresh1 $abs_dir= Follow_SymLink($abs_dir); 2216fb12b70Safresh1 unless (defined $abs_dir) { 2226fb12b70Safresh1 if ($dangling_symlinks) { 2236fb12b70Safresh1 if (ref $dangling_symlinks eq 'CODE') { 2246fb12b70Safresh1 $dangling_symlinks->($top_item, $cwd); 2256fb12b70Safresh1 } else { 2266fb12b70Safresh1 warnings::warnif "$top_item is a dangling symbolic link\n"; 2276fb12b70Safresh1 } 2286fb12b70Safresh1 } 2296fb12b70Safresh1 next Proc_Top_Item; 2306fb12b70Safresh1 } 2316fb12b70Safresh1 2326fb12b70Safresh1 if (-d _) { 2336fb12b70Safresh1 $top_item =~ s/\.dir\z//i if $Is_VMS; 2346fb12b70Safresh1 _find_dir_symlnk($wanted, $abs_dir, $top_item); 2356fb12b70Safresh1 $Is_Dir= 1; 2366fb12b70Safresh1 } 2376fb12b70Safresh1 } 2386fb12b70Safresh1 else { # no follow 2396fb12b70Safresh1 $topdir = $top_item; 2406fb12b70Safresh1 unless (defined $topnlink) { 2416fb12b70Safresh1 warnings::warnif "Can't stat $top_item: $!\n"; 2426fb12b70Safresh1 next Proc_Top_Item; 2436fb12b70Safresh1 } 2446fb12b70Safresh1 if (-d _) { 2456fb12b70Safresh1 $top_item =~ s/\.dir\z//i if $Is_VMS; 2466fb12b70Safresh1 _find_dir($wanted, $top_item, $topnlink); 2476fb12b70Safresh1 $Is_Dir= 1; 2486fb12b70Safresh1 } 2496fb12b70Safresh1 else { 2506fb12b70Safresh1 $abs_dir= $top_item; 2516fb12b70Safresh1 } 2526fb12b70Safresh1 } 2536fb12b70Safresh1 2546fb12b70Safresh1 unless ($Is_Dir) { 2556fb12b70Safresh1 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) { 2566fb12b70Safresh1 ($dir,$_) = ('./', $top_item); 2576fb12b70Safresh1 } 2586fb12b70Safresh1 2596fb12b70Safresh1 $abs_dir = $dir; 2606fb12b70Safresh1 if (( $untaint ) && (is_tainted($dir) )) { 2616fb12b70Safresh1 ( $abs_dir ) = $dir =~ m|$untaint_pat|; 2626fb12b70Safresh1 unless (defined $abs_dir) { 2636fb12b70Safresh1 if ($untaint_skip == 0) { 2646fb12b70Safresh1 die "directory $dir is still tainted"; 2656fb12b70Safresh1 } 2666fb12b70Safresh1 else { 2676fb12b70Safresh1 next Proc_Top_Item; 2686fb12b70Safresh1 } 2696fb12b70Safresh1 } 2706fb12b70Safresh1 } 2716fb12b70Safresh1 2726fb12b70Safresh1 unless ($no_chdir || chdir $abs_dir) { 2736fb12b70Safresh1 warnings::warnif "Couldn't chdir $abs_dir: $!\n"; 2746fb12b70Safresh1 next Proc_Top_Item; 2756fb12b70Safresh1 } 2766fb12b70Safresh1 2776fb12b70Safresh1 $name = $abs_dir . $_; # $File::Find::name 2786fb12b70Safresh1 $_ = $name if $no_chdir; 2796fb12b70Safresh1 2806fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 2816fb12b70Safresh1 2826fb12b70Safresh1 } 2836fb12b70Safresh1 2846fb12b70Safresh1 unless ( $no_chdir ) { 2856fb12b70Safresh1 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) { 2866fb12b70Safresh1 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|; 2876fb12b70Safresh1 unless (defined $cwd_untainted) { 2886fb12b70Safresh1 die "insecure cwd in find(depth)"; 2896fb12b70Safresh1 } 2906fb12b70Safresh1 $check_t_cwd = 0; 2916fb12b70Safresh1 } 2926fb12b70Safresh1 unless (chdir $cwd_untainted) { 2936fb12b70Safresh1 die "Can't cd to $cwd: $!\n"; 2946fb12b70Safresh1 } 2956fb12b70Safresh1 } 2966fb12b70Safresh1 } 2976fb12b70Safresh1} 2986fb12b70Safresh1 2996fb12b70Safresh1# API: 3006fb12b70Safresh1# $wanted 3016fb12b70Safresh1# $p_dir : "parent directory" 3026fb12b70Safresh1# $nlink : what came back from the stat 3036fb12b70Safresh1# preconditions: 3046fb12b70Safresh1# chdir (if not no_chdir) to dir 3056fb12b70Safresh1 3066fb12b70Safresh1sub _find_dir($$$) { 3076fb12b70Safresh1 my ($wanted, $p_dir, $nlink) = @_; 3086fb12b70Safresh1 my ($CdLvl,$Level) = (0,0); 3096fb12b70Safresh1 my @Stack; 3106fb12b70Safresh1 my @filenames; 3116fb12b70Safresh1 my ($subcount,$sub_nlink); 3126fb12b70Safresh1 my $SE= []; 3136fb12b70Safresh1 my $dir_name= $p_dir; 3146fb12b70Safresh1 my $dir_pref; 3156fb12b70Safresh1 my $dir_rel = $File::Find::current_dir; 3166fb12b70Safresh1 my $tainted = 0; 3176fb12b70Safresh1 my $no_nlink; 3186fb12b70Safresh1 319e0680481Safresh1 if ($Is_VMS) { 3206fb12b70Safresh1 # VMS is returning trailing .dir on directories 3216fb12b70Safresh1 # and trailing . on files and symbolic links 3226fb12b70Safresh1 # in UNIX syntax. 3236fb12b70Safresh1 # 3246fb12b70Safresh1 3256fb12b70Safresh1 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.'; 3266fb12b70Safresh1 3276fb12b70Safresh1 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" ); 3286fb12b70Safresh1 } 3296fb12b70Safresh1 else { 330e0680481Safresh1 $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/"; 3316fb12b70Safresh1 } 3326fb12b70Safresh1 333eac174f2Safresh1 local ($dir, $name, $prune); 3346fb12b70Safresh1 3356fb12b70Safresh1 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) { 3366fb12b70Safresh1 my $udir = $p_dir; 3376fb12b70Safresh1 if (( $untaint ) && (is_tainted($p_dir) )) { 3386fb12b70Safresh1 ( $udir ) = $p_dir =~ m|$untaint_pat|; 3396fb12b70Safresh1 unless (defined $udir) { 3406fb12b70Safresh1 if ($untaint_skip == 0) { 3416fb12b70Safresh1 die "directory $p_dir is still tainted"; 3426fb12b70Safresh1 } 3436fb12b70Safresh1 else { 3446fb12b70Safresh1 return; 3456fb12b70Safresh1 } 3466fb12b70Safresh1 } 3476fb12b70Safresh1 } 3486fb12b70Safresh1 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 3496fb12b70Safresh1 warnings::warnif "Can't cd to $udir: $!\n"; 3506fb12b70Safresh1 return; 3516fb12b70Safresh1 } 3526fb12b70Safresh1 } 3536fb12b70Safresh1 3546fb12b70Safresh1 # push the starting directory 3556fb12b70Safresh1 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 3566fb12b70Safresh1 3576fb12b70Safresh1 while (defined $SE) { 3586fb12b70Safresh1 unless ($bydepth) { 3596fb12b70Safresh1 $dir= $p_dir; # $File::Find::dir 3606fb12b70Safresh1 $name= $dir_name; # $File::Find::name 3616fb12b70Safresh1 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 3626fb12b70Safresh1 # prune may happen here 3636fb12b70Safresh1 $prune= 0; 3646fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 3656fb12b70Safresh1 next if $prune; 3666fb12b70Safresh1 } 3676fb12b70Safresh1 3686fb12b70Safresh1 # change to that directory 3696fb12b70Safresh1 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 3706fb12b70Safresh1 my $udir= $dir_rel; 3716fb12b70Safresh1 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) { 3726fb12b70Safresh1 ( $udir ) = $dir_rel =~ m|$untaint_pat|; 3736fb12b70Safresh1 unless (defined $udir) { 3746fb12b70Safresh1 if ($untaint_skip == 0) { 3756fb12b70Safresh1 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted"; 3766fb12b70Safresh1 } else { # $untaint_skip == 1 3776fb12b70Safresh1 next; 3786fb12b70Safresh1 } 3796fb12b70Safresh1 } 3806fb12b70Safresh1 } 3816fb12b70Safresh1 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) { 3826fb12b70Safresh1 warnings::warnif "Can't cd to (" . 3836fb12b70Safresh1 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n"; 3846fb12b70Safresh1 next; 3856fb12b70Safresh1 } 3866fb12b70Safresh1 $CdLvl++; 3876fb12b70Safresh1 } 3886fb12b70Safresh1 3896fb12b70Safresh1 $dir= $dir_name; # $File::Find::dir 3906fb12b70Safresh1 3916fb12b70Safresh1 # Get the list of files in the current directory. 392eac174f2Safresh1 my $dh; 393eac174f2Safresh1 unless (opendir $dh, ($no_chdir ? $dir_name : $File::Find::current_dir)) { 3946fb12b70Safresh1 warnings::warnif "Can't opendir($dir_name): $!\n"; 3956fb12b70Safresh1 next; 3966fb12b70Safresh1 } 397eac174f2Safresh1 @filenames = readdir $dh; 398eac174f2Safresh1 closedir($dh); 3996fb12b70Safresh1 @filenames = $pre_process->(@filenames) if $pre_process; 4006fb12b70Safresh1 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process; 4016fb12b70Safresh1 4026fb12b70Safresh1 # default: use whatever was specified 4036fb12b70Safresh1 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back) 4046fb12b70Safresh1 $no_nlink = $avoid_nlink; 4056fb12b70Safresh1 # if dir has wrong nlink count, force switch to slower stat method 4066fb12b70Safresh1 $no_nlink = 1 if ($nlink < 2); 4076fb12b70Safresh1 4086fb12b70Safresh1 if ($nlink == 2 && !$no_nlink) { 4096fb12b70Safresh1 # This dir has no subdirectories. 4106fb12b70Safresh1 for my $FN (@filenames) { 4116fb12b70Safresh1 if ($Is_VMS) { 4126fb12b70Safresh1 # Big hammer here - Compensate for VMS trailing . and .dir 4136fb12b70Safresh1 # No win situation until this is changed, but this 4146fb12b70Safresh1 # will handle the majority of the cases with breaking the fewest 4156fb12b70Safresh1 4166fb12b70Safresh1 $FN =~ s/\.dir\z//i; 4176fb12b70Safresh1 $FN =~ s#\.$## if ($FN ne '.'); 4186fb12b70Safresh1 } 4196fb12b70Safresh1 next if $FN =~ $File::Find::skip_pattern; 4206fb12b70Safresh1 4216fb12b70Safresh1 $name = $dir_pref . $FN; # $File::Find::name 4226fb12b70Safresh1 $_ = ($no_chdir ? $name : $FN); # $_ 4236fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 4246fb12b70Safresh1 } 4256fb12b70Safresh1 4266fb12b70Safresh1 } 4276fb12b70Safresh1 else { 4286fb12b70Safresh1 # This dir has subdirectories. 4296fb12b70Safresh1 $subcount = $nlink - 2; 4306fb12b70Safresh1 431b8851fccSafresh1 # HACK: insert directories at this position, so as to preserve 432b8851fccSafresh1 # the user pre-processed ordering of files (thus ensuring 433b8851fccSafresh1 # directory traversal is in user sorted order, not at random). 4346fb12b70Safresh1 my $stack_top = @Stack; 4356fb12b70Safresh1 4366fb12b70Safresh1 for my $FN (@filenames) { 4376fb12b70Safresh1 next if $FN =~ $File::Find::skip_pattern; 4386fb12b70Safresh1 if ($subcount > 0 || $no_nlink) { 4396fb12b70Safresh1 # Seen all the subdirs? 4406fb12b70Safresh1 # check for directoriness. 4416fb12b70Safresh1 # stat is faster for a file in the current directory 4426fb12b70Safresh1 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3]; 4436fb12b70Safresh1 4446fb12b70Safresh1 if (-d _) { 4456fb12b70Safresh1 --$subcount; 4466fb12b70Safresh1 $FN =~ s/\.dir\z//i if $Is_VMS; 4476fb12b70Safresh1 # HACK: replace push to preserve dir traversal order 4486fb12b70Safresh1 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink]; 4496fb12b70Safresh1 splice @Stack, $stack_top, 0, 4506fb12b70Safresh1 [$CdLvl,$dir_name,$FN,$sub_nlink]; 4516fb12b70Safresh1 } 4526fb12b70Safresh1 else { 4536fb12b70Safresh1 $name = $dir_pref . $FN; # $File::Find::name 4546fb12b70Safresh1 $_= ($no_chdir ? $name : $FN); # $_ 4556fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 4566fb12b70Safresh1 } 4576fb12b70Safresh1 } 4586fb12b70Safresh1 else { 4596fb12b70Safresh1 $name = $dir_pref . $FN; # $File::Find::name 4606fb12b70Safresh1 $_= ($no_chdir ? $name : $FN); # $_ 4616fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 4626fb12b70Safresh1 } 4636fb12b70Safresh1 } 4646fb12b70Safresh1 } 4656fb12b70Safresh1 } 4666fb12b70Safresh1 continue { 4676fb12b70Safresh1 while ( defined ($SE = pop @Stack) ) { 4686fb12b70Safresh1 ($Level, $p_dir, $dir_rel, $nlink) = @$SE; 4696fb12b70Safresh1 if ($CdLvl > $Level && !$no_chdir) { 4706fb12b70Safresh1 my $tmp; 4716fb12b70Safresh1 if ($Is_VMS) { 4726fb12b70Safresh1 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']'; 4736fb12b70Safresh1 } 4746fb12b70Safresh1 else { 4756fb12b70Safresh1 $tmp = join('/',('..') x ($CdLvl-$Level)); 4766fb12b70Safresh1 } 4776fb12b70Safresh1 die "Can't cd to $tmp from $dir_name: $!" 4786fb12b70Safresh1 unless chdir ($tmp); 4796fb12b70Safresh1 $CdLvl = $Level; 4806fb12b70Safresh1 } 4816fb12b70Safresh1 482e0680481Safresh1 if ($^O eq 'VMS') { 4836fb12b70Safresh1 if ($p_dir =~ m/[\]>]+$/) { 4846fb12b70Safresh1 $dir_name = $p_dir; 4856fb12b70Safresh1 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/; 4866fb12b70Safresh1 $dir_pref = $dir_name; 4876fb12b70Safresh1 } 4886fb12b70Safresh1 else { 4896fb12b70Safresh1 $dir_name = "$p_dir/$dir_rel"; 4906fb12b70Safresh1 $dir_pref = "$dir_name/"; 4916fb12b70Safresh1 } 4926fb12b70Safresh1 } 4936fb12b70Safresh1 else { 494e0680481Safresh1 $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"; 4956fb12b70Safresh1 $dir_pref = "$dir_name/"; 4966fb12b70Safresh1 } 4976fb12b70Safresh1 4986fb12b70Safresh1 if ( $nlink == -2 ) { 4996fb12b70Safresh1 $name = $dir = $p_dir; # $File::Find::name / dir 5006fb12b70Safresh1 $_ = $File::Find::current_dir; 5016fb12b70Safresh1 $post_process->(); # End-of-directory processing 5026fb12b70Safresh1 } 5036fb12b70Safresh1 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now 5046fb12b70Safresh1 $name = $dir_name; 5056fb12b70Safresh1 if ( substr($name,-2) eq '/.' ) { 5066fb12b70Safresh1 substr($name, length($name) == 2 ? -1 : -2) = ''; 5076fb12b70Safresh1 } 5086fb12b70Safresh1 $dir = $p_dir; 5096fb12b70Safresh1 $_ = ($no_chdir ? $dir_name : $dir_rel ); 5106fb12b70Safresh1 if ( substr($_,-2) eq '/.' ) { 5116fb12b70Safresh1 substr($_, length($_) == 2 ? -1 : -2) = ''; 5126fb12b70Safresh1 } 5136fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 5146fb12b70Safresh1 } 5156fb12b70Safresh1 else { 5166fb12b70Safresh1 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth; 5176fb12b70Safresh1 last; 5186fb12b70Safresh1 } 5196fb12b70Safresh1 } 5206fb12b70Safresh1 } 5216fb12b70Safresh1} 5226fb12b70Safresh1 5236fb12b70Safresh1 5246fb12b70Safresh1# API: 5256fb12b70Safresh1# $wanted 5266fb12b70Safresh1# $dir_loc : absolute location of a dir 5276fb12b70Safresh1# $p_dir : "parent directory" 5286fb12b70Safresh1# preconditions: 5296fb12b70Safresh1# chdir (if not no_chdir) to dir 5306fb12b70Safresh1 5316fb12b70Safresh1sub _find_dir_symlnk($$$) { 5326fb12b70Safresh1 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory 5336fb12b70Safresh1 my @Stack; 5346fb12b70Safresh1 my @filenames; 5356fb12b70Safresh1 my $new_loc; 5366fb12b70Safresh1 my $updir_loc = $dir_loc; # untainted parent directory 5376fb12b70Safresh1 my $SE = []; 5386fb12b70Safresh1 my $dir_name = $p_dir; 5396fb12b70Safresh1 my $dir_pref; 5406fb12b70Safresh1 my $loc_pref; 5416fb12b70Safresh1 my $dir_rel = $File::Find::current_dir; 5426fb12b70Safresh1 my $byd_flag; # flag for pending stack entry if $bydepth 5436fb12b70Safresh1 my $tainted = 0; 5446fb12b70Safresh1 my $ok = 1; 5456fb12b70Safresh1 546e0680481Safresh1 $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/"; 547e0680481Safresh1 $loc_pref = _is_root($dir_loc) ? $dir_loc : "$dir_loc/"; 5486fb12b70Safresh1 549eac174f2Safresh1 local ($dir, $name, $fullname, $prune); 5506fb12b70Safresh1 5516fb12b70Safresh1 unless ($no_chdir) { 5526fb12b70Safresh1 # untaint the topdir 5536fb12b70Safresh1 if (( $untaint ) && (is_tainted($dir_loc) )) { 5546fb12b70Safresh1 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted 5556fb12b70Safresh1 # once untainted, $updir_loc is pushed on the stack (as parent directory); 5566fb12b70Safresh1 # hence, we don't need to untaint the parent directory every time we chdir 5576fb12b70Safresh1 # to it later 5586fb12b70Safresh1 unless (defined $updir_loc) { 5596fb12b70Safresh1 if ($untaint_skip == 0) { 5606fb12b70Safresh1 die "directory $dir_loc is still tainted"; 5616fb12b70Safresh1 } 5626fb12b70Safresh1 else { 5636fb12b70Safresh1 return; 5646fb12b70Safresh1 } 5656fb12b70Safresh1 } 5666fb12b70Safresh1 } 5676fb12b70Safresh1 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir); 5686fb12b70Safresh1 unless ($ok) { 5696fb12b70Safresh1 warnings::warnif "Can't cd to $updir_loc: $!\n"; 5706fb12b70Safresh1 return; 5716fb12b70Safresh1 } 5726fb12b70Safresh1 } 5736fb12b70Safresh1 5746fb12b70Safresh1 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth; 5756fb12b70Safresh1 5766fb12b70Safresh1 while (defined $SE) { 5776fb12b70Safresh1 5786fb12b70Safresh1 unless ($bydepth) { 5796fb12b70Safresh1 # change (back) to parent directory (always untainted) 5806fb12b70Safresh1 unless ($no_chdir) { 5816fb12b70Safresh1 unless (chdir $updir_loc) { 5826fb12b70Safresh1 warnings::warnif "Can't cd to $updir_loc: $!\n"; 5836fb12b70Safresh1 next; 5846fb12b70Safresh1 } 5856fb12b70Safresh1 } 5866fb12b70Safresh1 $dir= $p_dir; # $File::Find::dir 5876fb12b70Safresh1 $name= $dir_name; # $File::Find::name 5886fb12b70Safresh1 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_ 5896fb12b70Safresh1 $fullname= $dir_loc; # $File::Find::fullname 5906fb12b70Safresh1 # prune may happen here 5916fb12b70Safresh1 $prune= 0; 5926fb12b70Safresh1 lstat($_); # make sure file tests with '_' work 5936fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 5946fb12b70Safresh1 next if $prune; 5956fb12b70Safresh1 } 5966fb12b70Safresh1 5976fb12b70Safresh1 # change to that directory 5986fb12b70Safresh1 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 5996fb12b70Safresh1 $updir_loc = $dir_loc; 6006fb12b70Safresh1 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) { 6016fb12b70Safresh1 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir 6026fb12b70Safresh1 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; 6036fb12b70Safresh1 unless (defined $updir_loc) { 6046fb12b70Safresh1 if ($untaint_skip == 0) { 6056fb12b70Safresh1 die "directory $dir_loc is still tainted"; 6066fb12b70Safresh1 } 6076fb12b70Safresh1 else { 6086fb12b70Safresh1 next; 6096fb12b70Safresh1 } 6106fb12b70Safresh1 } 6116fb12b70Safresh1 } 6126fb12b70Safresh1 unless (chdir $updir_loc) { 6136fb12b70Safresh1 warnings::warnif "Can't cd to $updir_loc: $!\n"; 6146fb12b70Safresh1 next; 6156fb12b70Safresh1 } 6166fb12b70Safresh1 } 6176fb12b70Safresh1 6186fb12b70Safresh1 $dir = $dir_name; # $File::Find::dir 6196fb12b70Safresh1 6206fb12b70Safresh1 # Get the list of files in the current directory. 621eac174f2Safresh1 my $dh; 622eac174f2Safresh1 unless (opendir $dh, ($no_chdir ? $dir_loc : $File::Find::current_dir)) { 6236fb12b70Safresh1 warnings::warnif "Can't opendir($dir_loc): $!\n"; 6246fb12b70Safresh1 next; 6256fb12b70Safresh1 } 626eac174f2Safresh1 @filenames = readdir $dh; 627eac174f2Safresh1 closedir($dh); 6286fb12b70Safresh1 6296fb12b70Safresh1 for my $FN (@filenames) { 6306fb12b70Safresh1 if ($Is_VMS) { 6316fb12b70Safresh1 # Big hammer here - Compensate for VMS trailing . and .dir 6326fb12b70Safresh1 # No win situation until this is changed, but this 6336fb12b70Safresh1 # will handle the majority of the cases with breaking the fewest. 6346fb12b70Safresh1 6356fb12b70Safresh1 $FN =~ s/\.dir\z//i; 6366fb12b70Safresh1 $FN =~ s#\.$## if ($FN ne '.'); 6376fb12b70Safresh1 } 6386fb12b70Safresh1 next if $FN =~ $File::Find::skip_pattern; 6396fb12b70Safresh1 6406fb12b70Safresh1 # follow symbolic links / do an lstat 6416fb12b70Safresh1 $new_loc = Follow_SymLink($loc_pref.$FN); 6426fb12b70Safresh1 6436fb12b70Safresh1 # ignore if invalid symlink 6446fb12b70Safresh1 unless (defined $new_loc) { 6456fb12b70Safresh1 if (!defined -l _ && $dangling_symlinks) { 6466fb12b70Safresh1 $fullname = undef; 6476fb12b70Safresh1 if (ref $dangling_symlinks eq 'CODE') { 6486fb12b70Safresh1 $dangling_symlinks->($FN, $dir_pref); 6496fb12b70Safresh1 } else { 6506fb12b70Safresh1 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n"; 6516fb12b70Safresh1 } 6526fb12b70Safresh1 } 6536fb12b70Safresh1 else { 6546fb12b70Safresh1 $fullname = $loc_pref . $FN; 6556fb12b70Safresh1 } 6566fb12b70Safresh1 $name = $dir_pref . $FN; 6576fb12b70Safresh1 $_ = ($no_chdir ? $name : $FN); 6586fb12b70Safresh1 { $wanted_callback->() }; 6596fb12b70Safresh1 next; 6606fb12b70Safresh1 } 6616fb12b70Safresh1 6626fb12b70Safresh1 if (-d _) { 6636fb12b70Safresh1 if ($Is_VMS) { 6646fb12b70Safresh1 $FN =~ s/\.dir\z//i; 6656fb12b70Safresh1 $FN =~ s#\.$## if ($FN ne '.'); 6666fb12b70Safresh1 $new_loc =~ s/\.dir\z//i; 6676fb12b70Safresh1 $new_loc =~ s#\.$## if ($new_loc ne '.'); 6686fb12b70Safresh1 } 6696fb12b70Safresh1 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1]; 6706fb12b70Safresh1 } 6716fb12b70Safresh1 else { 6726fb12b70Safresh1 $fullname = $new_loc; # $File::Find::fullname 6736fb12b70Safresh1 $name = $dir_pref . $FN; # $File::Find::name 6746fb12b70Safresh1 $_ = ($no_chdir ? $name : $FN); # $_ 6756fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 6766fb12b70Safresh1 } 6776fb12b70Safresh1 } 6786fb12b70Safresh1 6796fb12b70Safresh1 } 6806fb12b70Safresh1 continue { 6816fb12b70Safresh1 while (defined($SE = pop @Stack)) { 6826fb12b70Safresh1 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE; 683e0680481Safresh1 $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel"; 6846fb12b70Safresh1 $dir_pref = "$dir_name/"; 6856fb12b70Safresh1 $loc_pref = "$dir_loc/"; 6866fb12b70Safresh1 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now 6876fb12b70Safresh1 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) { 6886fb12b70Safresh1 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted 6896fb12b70Safresh1 warnings::warnif "Can't cd to $updir_loc: $!\n"; 6906fb12b70Safresh1 next; 6916fb12b70Safresh1 } 6926fb12b70Safresh1 } 6936fb12b70Safresh1 $fullname = $dir_loc; # $File::Find::fullname 6946fb12b70Safresh1 $name = $dir_name; # $File::Find::name 6956fb12b70Safresh1 if ( substr($name,-2) eq '/.' ) { 6966fb12b70Safresh1 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name 6976fb12b70Safresh1 } 6986fb12b70Safresh1 $dir = $p_dir; # $File::Find::dir 6996fb12b70Safresh1 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_ 7006fb12b70Safresh1 if ( substr($_,-2) eq '/.' ) { 7016fb12b70Safresh1 substr($_, length($_) == 2 ? -1 : -2) = ''; 7026fb12b70Safresh1 } 7036fb12b70Safresh1 7046fb12b70Safresh1 lstat($_); # make sure file tests with '_' work 7056fb12b70Safresh1 { $wanted_callback->() }; # protect against wild "next" 7066fb12b70Safresh1 } 7076fb12b70Safresh1 else { 7086fb12b70Safresh1 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth; 7096fb12b70Safresh1 last; 7106fb12b70Safresh1 } 7116fb12b70Safresh1 } 7126fb12b70Safresh1 } 7136fb12b70Safresh1} 7146fb12b70Safresh1 7156fb12b70Safresh1 7166fb12b70Safresh1sub wrap_wanted { 7176fb12b70Safresh1 my $wanted = shift; 7186fb12b70Safresh1 if ( ref($wanted) eq 'HASH' ) { 719b8851fccSafresh1 # RT #122547 720b8851fccSafresh1 my %valid_options = map {$_ => 1} qw( 721b8851fccSafresh1 wanted 722b8851fccSafresh1 bydepth 723b8851fccSafresh1 preprocess 724b8851fccSafresh1 postprocess 725b8851fccSafresh1 follow 726b8851fccSafresh1 follow_fast 727b8851fccSafresh1 follow_skip 728b8851fccSafresh1 dangling_symlinks 729b8851fccSafresh1 no_chdir 730b8851fccSafresh1 untaint 731b8851fccSafresh1 untaint_pattern 732b8851fccSafresh1 untaint_skip 733b8851fccSafresh1 ); 734b8851fccSafresh1 my @invalid_options = (); 735b8851fccSafresh1 for my $v (keys %{$wanted}) { 736b8851fccSafresh1 push @invalid_options, $v unless exists $valid_options{$v}; 737b8851fccSafresh1 } 738b8851fccSafresh1 warn "Invalid option(s): @invalid_options" if @invalid_options; 739b8851fccSafresh1 7406fb12b70Safresh1 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) { 7416fb12b70Safresh1 die 'no &wanted subroutine given'; 7426fb12b70Safresh1 } 7436fb12b70Safresh1 if ( $wanted->{follow} || $wanted->{follow_fast}) { 7446fb12b70Safresh1 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip}; 7456fb12b70Safresh1 } 7466fb12b70Safresh1 if ( $wanted->{untaint} ) { 7476fb12b70Safresh1 $wanted->{untaint_pattern} = $File::Find::untaint_pattern 7486fb12b70Safresh1 unless defined $wanted->{untaint_pattern}; 7496fb12b70Safresh1 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip}; 7506fb12b70Safresh1 } 7516fb12b70Safresh1 return $wanted; 7526fb12b70Safresh1 } 7536fb12b70Safresh1 elsif( ref( $wanted ) eq 'CODE' ) { 7546fb12b70Safresh1 return { wanted => $wanted }; 7556fb12b70Safresh1 } 7566fb12b70Safresh1 else { 7576fb12b70Safresh1 die 'no &wanted subroutine given'; 7586fb12b70Safresh1 } 7596fb12b70Safresh1} 7606fb12b70Safresh1 7616fb12b70Safresh1sub find { 7626fb12b70Safresh1 my $wanted = shift; 7636fb12b70Safresh1 _find_opt(wrap_wanted($wanted), @_); 7646fb12b70Safresh1} 7656fb12b70Safresh1 7666fb12b70Safresh1sub finddepth { 7676fb12b70Safresh1 my $wanted = wrap_wanted(shift); 7686fb12b70Safresh1 $wanted->{bydepth} = 1; 7696fb12b70Safresh1 _find_opt($wanted, @_); 7706fb12b70Safresh1} 7716fb12b70Safresh1 7726fb12b70Safresh1# default 7736fb12b70Safresh1$File::Find::skip_pattern = qr/^\.{1,2}\z/; 7746fb12b70Safresh1$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|; 7756fb12b70Safresh1 7766fb12b70Safresh1# this _should_ work properly on all platforms 7776fb12b70Safresh1# where File::Find can be expected to work 7786fb12b70Safresh1$File::Find::current_dir = File::Spec->curdir || '.'; 7796fb12b70Safresh1 780b46d8ef2Safresh1$File::Find::dont_use_nlink = 1; 7816fb12b70Safresh1 7826fb12b70Safresh1# We need a function that checks if a scalar is tainted. Either use the 7836fb12b70Safresh1# Scalar::Util module's tainted() function or our (slower) pure Perl 7846fb12b70Safresh1# fallback is_tainted_pp() 7856fb12b70Safresh1{ 7866fb12b70Safresh1 local $@; 7876fb12b70Safresh1 eval { require Scalar::Util }; 7886fb12b70Safresh1 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted; 7896fb12b70Safresh1} 7906fb12b70Safresh1 7916fb12b70Safresh11; 792b8851fccSafresh1 793b8851fccSafresh1__END__ 794b8851fccSafresh1 795b8851fccSafresh1=head1 NAME 796b8851fccSafresh1 797b8851fccSafresh1File::Find - Traverse a directory tree. 798b8851fccSafresh1 799b8851fccSafresh1=head1 SYNOPSIS 800b8851fccSafresh1 801b8851fccSafresh1 use File::Find; 802b8851fccSafresh1 find(\&wanted, @directories_to_search); 803b8851fccSafresh1 sub wanted { ... } 804b8851fccSafresh1 805b8851fccSafresh1 use File::Find; 806b8851fccSafresh1 finddepth(\&wanted, @directories_to_search); 807b8851fccSafresh1 sub wanted { ... } 808b8851fccSafresh1 809b8851fccSafresh1 use File::Find; 810b8851fccSafresh1 find({ wanted => \&process, follow => 1 }, '.'); 811b8851fccSafresh1 812b8851fccSafresh1=head1 DESCRIPTION 813b8851fccSafresh1 814b8851fccSafresh1These are functions for searching through directory trees doing work 815*3d61058aSafresh1on each file found similar to the Unix L<find(1)> command. C<File::Find> 816b8851fccSafresh1exports two functions, C<find> and C<finddepth>. They work similarly 817b8851fccSafresh1but have subtle differences. 818b8851fccSafresh1 819b8851fccSafresh1=over 4 820b8851fccSafresh1 821b8851fccSafresh1=item B<find> 822b8851fccSafresh1 823b8851fccSafresh1 find(\&wanted, @directories); 824b8851fccSafresh1 find(\%options, @directories); 825b8851fccSafresh1 826b8851fccSafresh1C<find()> does a depth-first search over the given C<@directories> in 827b8851fccSafresh1the order they are given. For each file or directory found, it calls 828b8851fccSafresh1the C<&wanted> subroutine. (See below for details on how to use the 829b8851fccSafresh1C<&wanted> function). Additionally, for each directory found, it will 830b8851fccSafresh1C<chdir()> into that directory and continue the search, invoking the 831b8851fccSafresh1C<&wanted> function on each file or subdirectory in the directory. 832b8851fccSafresh1 833b8851fccSafresh1=item B<finddepth> 834b8851fccSafresh1 835b8851fccSafresh1 finddepth(\&wanted, @directories); 836b8851fccSafresh1 finddepth(\%options, @directories); 837b8851fccSafresh1 838b8851fccSafresh1C<finddepth()> works just like C<find()> except that it invokes the 839b8851fccSafresh1C<&wanted> function for a directory I<after> invoking it for the 840b8851fccSafresh1directory's contents. It does a postorder traversal instead of a 841b8851fccSafresh1preorder traversal, working from the bottom of the directory tree up 842b8851fccSafresh1where C<find()> works from the top of the tree down. 843b8851fccSafresh1 844b8851fccSafresh1=back 845b8851fccSafresh1 846eac174f2Safresh1Despite the name of the C<finddepth()> function, both C<find()> and 847eac174f2Safresh1C<finddepth()> perform a depth-first search of the directory hierarchy. 848eac174f2Safresh1 849*3d61058aSafresh1=head2 C<%options> 850b8851fccSafresh1 851b8851fccSafresh1The first argument to C<find()> is either a code reference to your 852b8851fccSafresh1C<&wanted> function, or a hash reference describing the operations 853b8851fccSafresh1to be performed for each file. The 854b8851fccSafresh1code reference is described in L</The wanted function> below. 855b8851fccSafresh1 856*3d61058aSafresh1Here are the possible B<keys> for the hash: 857b8851fccSafresh1 858eac174f2Safresh1=over 4 859b8851fccSafresh1 860b8851fccSafresh1=item C<wanted> 861b8851fccSafresh1 862b8851fccSafresh1The value should be a code reference. This code reference is 863b8851fccSafresh1described in L</The wanted function> below. The C<&wanted> subroutine is 864b8851fccSafresh1mandatory. 865b8851fccSafresh1 866b8851fccSafresh1=item C<bydepth> 867b8851fccSafresh1 868b8851fccSafresh1Reports the name of a directory only AFTER all its entries 869b8851fccSafresh1have been reported. Entry point C<finddepth()> is a shortcut for 870b8851fccSafresh1specifying C<< { bydepth => 1 } >> in the first argument of C<find()>. 871b8851fccSafresh1 872b8851fccSafresh1=item C<preprocess> 873b8851fccSafresh1 874b8851fccSafresh1The value should be a code reference. This code reference is used to 875b8851fccSafresh1preprocess the current directory. The name of the currently processed 876b8851fccSafresh1directory is in C<$File::Find::dir>. Your preprocessing function is 877b8851fccSafresh1called after C<readdir()>, but before the loop that calls the C<wanted()> 878b8851fccSafresh1function. It is called with a list of strings (actually file/directory 879b8851fccSafresh1names) and is expected to return a list of strings. The code can be 880b8851fccSafresh1used to sort the file/directory names alphabetically, numerically, 881b8851fccSafresh1or to filter out directory entries based on their name alone. When 882*3d61058aSafresh1C<follow> or C<follow_fast> are in effect, C<preprocess> is a no-op. 883b8851fccSafresh1 884b8851fccSafresh1=item C<postprocess> 885b8851fccSafresh1 886b8851fccSafresh1The value should be a code reference. It is invoked just before leaving 887b8851fccSafresh1the currently processed directory. It is called in void context with no 888b8851fccSafresh1arguments. The name of the current directory is in C<$File::Find::dir>. This 889b8851fccSafresh1hook is handy for summarizing a directory, such as calculating its disk 890*3d61058aSafresh1usage. When C<follow> or C<follow_fast> are in effect, C<postprocess> is a 891b8851fccSafresh1no-op. 892b8851fccSafresh1 893b8851fccSafresh1=item C<follow> 894b8851fccSafresh1 895b8851fccSafresh1Causes symbolic links to be followed. Since directory trees with symbolic 896b8851fccSafresh1links (followed) may contain files more than once and may even have 897b8851fccSafresh1cycles, a hash has to be built up with an entry for each file. 898b8851fccSafresh1This might be expensive both in space and time for a large 899b8851fccSafresh1directory tree. See L</follow_fast> and L</follow_skip> below. 900*3d61058aSafresh1If either C<follow> or C<follow_fast> is in effect: 901b8851fccSafresh1 902eac174f2Safresh1=over 4 903b8851fccSafresh1 904b8851fccSafresh1=item * 905b8851fccSafresh1 906*3d61058aSafresh1It is guaranteed that an C<lstat> has been called before the user's 907b8851fccSafresh1C<wanted()> function is called. This enables fast file checks involving C<_>. 908*3d61058aSafresh1Note that this guarantee no longer holds if C<follow> or C<follow_fast> 909b8851fccSafresh1are not set. 910b8851fccSafresh1 911b8851fccSafresh1=item * 912b8851fccSafresh1 913b8851fccSafresh1There is a variable C<$File::Find::fullname> which holds the absolute 914b8851fccSafresh1pathname of the file with all symbolic links resolved. If the link is 915b8851fccSafresh1a dangling symbolic link, then fullname will be set to C<undef>. 916b8851fccSafresh1 917b8851fccSafresh1=back 918b8851fccSafresh1 919b8851fccSafresh1=item C<follow_fast> 920b8851fccSafresh1 921*3d61058aSafresh1This is similar to C<follow> except that it may report some files more 922b8851fccSafresh1than once. It does detect cycles, however. Since only symbolic links 923b8851fccSafresh1have to be hashed, this is much cheaper both in space and time. If 924b8851fccSafresh1processing a file more than once (by the user's C<wanted()> function) 925*3d61058aSafresh1is worse than just taking time, the option C<follow> should be used. 926b8851fccSafresh1 927b8851fccSafresh1=item C<follow_skip> 928b8851fccSafresh1 929b8851fccSafresh1C<follow_skip==1>, which is the default, causes all files which are 930b8851fccSafresh1neither directories nor symbolic links to be ignored if they are about 931b8851fccSafresh1to be processed a second time. If a directory or a symbolic link 932*3d61058aSafresh1are about to be processed a second time, C<File::Find> dies. 933b8851fccSafresh1 934*3d61058aSafresh1C<follow_skip==0> causes C<File::Find> to die if any file is about to be 935b8851fccSafresh1processed a second time. 936b8851fccSafresh1 937*3d61058aSafresh1C<follow_skip==2> causes C<File::Find> to ignore any duplicate files and 938b8851fccSafresh1directories but to proceed normally otherwise. 939b8851fccSafresh1 940b8851fccSafresh1=item C<dangling_symlinks> 941b8851fccSafresh1 942b8851fccSafresh1Specifies what to do with symbolic links whose target doesn't exist. 943b8851fccSafresh1If true and a code reference, will be called with the symbolic link 944b8851fccSafresh1name and the directory it lives in as arguments. Otherwise, if true 945b8851fccSafresh1and warnings are on, a warning of the form C<"symbolic_link_name is a dangling 946b8851fccSafresh1symbolic link\n"> will be issued. If false, the dangling symbolic link 947b8851fccSafresh1will be silently ignored. 948b8851fccSafresh1 949b8851fccSafresh1=item C<no_chdir> 950b8851fccSafresh1 951b8851fccSafresh1Does not C<chdir()> to each directory as it recurses. The C<wanted()> 952b8851fccSafresh1function will need to be aware of this, of course. In this case, 953b8851fccSafresh1C<$_> will be the same as C<$File::Find::name>. 954b8851fccSafresh1 955b8851fccSafresh1=item C<untaint> 956b8851fccSafresh1 957*3d61058aSafresh1If find is used in L<taint-mode|perlsec/Taint mode> (C<-T> command line 958*3d61058aSafresh1switch or C<if EUID != UID> or C<if EGID != GID>), then internally 959*3d61058aSafresh1directory names have to be untainted before they can be C<chdir>'d to. 960*3d61058aSafresh1Therefore they are checked against a regular expression C<untaint_pattern>. 961*3d61058aSafresh1Note that all names passed to the user's C<wanted()> function are still 962*3d61058aSafresh1tainted. If this option is used while not in taint-mode, C<untaint> 963*3d61058aSafresh1is a no-op. 964b8851fccSafresh1 965b8851fccSafresh1=item C<untaint_pattern> 966b8851fccSafresh1 967b8851fccSafresh1See above. This should be set using the C<qr> quoting operator. 968b8851fccSafresh1The default is set to C<qr|^([-+@\w./]+)$|>. 969b8851fccSafresh1Note that the parentheses are vital. 970b8851fccSafresh1 971b8851fccSafresh1=item C<untaint_skip> 972b8851fccSafresh1 973*3d61058aSafresh1If set, a directory which fails the C<untaint_pattern> is skipped, 974b8851fccSafresh1including all its sub-directories. The default is to C<die> in such a case. 975b8851fccSafresh1 976b8851fccSafresh1=back 977b8851fccSafresh1 978b8851fccSafresh1=head2 The wanted function 979b8851fccSafresh1 980b8851fccSafresh1The C<wanted()> function does whatever verifications you want on 981b8851fccSafresh1each file and directory. Note that despite its name, the C<wanted()> 982b8851fccSafresh1function is a generic callback function, and does B<not> tell 983*3d61058aSafresh1C<File::Find> if a file is "wanted" or not. In fact, its return value 984b8851fccSafresh1is ignored. 985b8851fccSafresh1 986*3d61058aSafresh1The C<wanted> function takes no arguments but rather does its work 987b8851fccSafresh1through a collection of variables. 988b8851fccSafresh1 989b8851fccSafresh1=over 4 990b8851fccSafresh1 991b8851fccSafresh1=item C<$File::Find::dir> is the current directory name, 992b8851fccSafresh1 993b8851fccSafresh1=item C<$_> is the current filename within that directory 994b8851fccSafresh1 995b8851fccSafresh1=item C<$File::Find::name> is the complete pathname to the file. 996b8851fccSafresh1 997b8851fccSafresh1=back 998b8851fccSafresh1 999b8851fccSafresh1The above variables have all been localized and may be changed without 1000b8851fccSafresh1affecting data outside of the wanted function. 1001b8851fccSafresh1 1002b8851fccSafresh1For example, when examining the file F</some/path/foo.ext> you will have: 1003b8851fccSafresh1 1004b8851fccSafresh1 $File::Find::dir = /some/path/ 1005b8851fccSafresh1 $_ = foo.ext 1006b8851fccSafresh1 $File::Find::name = /some/path/foo.ext 1007b8851fccSafresh1 1008*3d61058aSafresh1You are C<chdir()>'d to C<$File::Find::dir> when the function is called, 1009b8851fccSafresh1unless C<no_chdir> was specified. Note that when changing to 1010b8851fccSafresh1directories is in effect, the root directory (F</>) is a somewhat 1011b8851fccSafresh1special case inasmuch as the concatenation of C<$File::Find::dir>, 1012b8851fccSafresh1C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The 1013b8851fccSafresh1table below summarizes all variants: 1014b8851fccSafresh1 1015b8851fccSafresh1 $File::Find::name $File::Find::dir $_ 1016b8851fccSafresh1 default / / . 1017b8851fccSafresh1 no_chdir=>0 /etc / etc 1018b8851fccSafresh1 /etc/x /etc x 1019b8851fccSafresh1 1020b8851fccSafresh1 no_chdir=>1 / / / 1021b8851fccSafresh1 /etc / /etc 1022b8851fccSafresh1 /etc/x /etc /etc/x 1023b8851fccSafresh1 1024b8851fccSafresh1 1025b8851fccSafresh1When C<follow> or C<follow_fast> are in effect, there is 1026b8851fccSafresh1also a C<$File::Find::fullname>. The function may set 1027b8851fccSafresh1C<$File::Find::prune> to prune the tree unless C<bydepth> was 1028b8851fccSafresh1specified. Unless C<follow> or C<follow_fast> is specified, for 1029*3d61058aSafresh1compatibility reasons (C<find.pl>, L<find2perl>) there are 1030*3d61058aSafresh1in addition the following globals available: C<$File::Find::topdir>, 1031b8851fccSafresh1C<$File::Find::topdev>, C<$File::Find::topino>, 1032b8851fccSafresh1C<$File::Find::topmode> and C<$File::Find::topnlink>. 1033b8851fccSafresh1 1034*3d61058aSafresh1This library is useful for the C<find2perl> tool (distributed with the 1035*3d61058aSafresh1L<App::find2perl> CPAN module), which when fed: 1036b8851fccSafresh1 1037b8851fccSafresh1 find2perl / -name .nfs\* -mtime +7 \ 1038b8851fccSafresh1 -exec rm -f {} \; -o -fstype nfs -prune 1039b8851fccSafresh1 1040b8851fccSafresh1produces something like: 1041b8851fccSafresh1 1042b8851fccSafresh1 sub wanted { 1043b8851fccSafresh1 /^\.nfs.*\z/s && 1044b8851fccSafresh1 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) && 1045b8851fccSafresh1 int(-M _) > 7 && 1046b8851fccSafresh1 unlink($_) 1047b8851fccSafresh1 || 1048b8851fccSafresh1 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) && 1049b8851fccSafresh1 $dev < 0 && 1050b8851fccSafresh1 ($File::Find::prune = 1); 1051b8851fccSafresh1 } 1052b8851fccSafresh1 1053b8851fccSafresh1Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical 1054b8851fccSafresh1filehandle that caches the information from the preceding 1055b8851fccSafresh1C<stat()>, C<lstat()>, or filetest. 1056b8851fccSafresh1 1057b8851fccSafresh1Here's another interesting wanted function. It will find all symbolic 1058b8851fccSafresh1links that don't resolve: 1059b8851fccSafresh1 1060b8851fccSafresh1 sub wanted { 1061b8851fccSafresh1 -l && !-e && print "bogus link: $File::Find::name\n"; 1062b8851fccSafresh1 } 1063b8851fccSafresh1 1064b8851fccSafresh1Note that you may mix directories and (non-directory) files in the list of 1065b8851fccSafresh1directories to be searched by the C<wanted()> function. 1066b8851fccSafresh1 1067b8851fccSafresh1 find(\&wanted, "./foo", "./bar", "./baz/epsilon"); 1068b8851fccSafresh1 1069b8851fccSafresh1In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be 1070b8851fccSafresh1evaluated by C<wanted()>. 1071b8851fccSafresh1 1072b8851fccSafresh1See also the script C<pfind> on CPAN for a nice application of this 1073b8851fccSafresh1module. 1074b8851fccSafresh1 1075b8851fccSafresh1=head1 WARNINGS 1076b8851fccSafresh1 1077b8851fccSafresh1If you run your program with the C<-w> switch, or if you use the 1078b8851fccSafresh1C<warnings> pragma, File::Find will report warnings for several weird 1079b8851fccSafresh1situations. You can disable these warnings by putting the statement 1080b8851fccSafresh1 1081b8851fccSafresh1 no warnings 'File::Find'; 1082b8851fccSafresh1 1083b8851fccSafresh1in the appropriate scope. See L<warnings> for more info about lexical 1084b8851fccSafresh1warnings. 1085b8851fccSafresh1 1086eac174f2Safresh1=head1 BUGS AND CAVEATS 1087b8851fccSafresh1 1088eac174f2Safresh1=over 4 1089b8851fccSafresh1 1090*3d61058aSafresh1=item C<$dont_use_nlink> 1091b8851fccSafresh1 1092*3d61058aSafresh1You can set the variable C<$File::Find::dont_use_nlink> to C<0> if you 1093b46d8ef2Safresh1are sure the filesystem you are scanning reflects the number of 1094b46d8ef2Safresh1subdirectories in the parent directory's C<nlink> count. 1095b8851fccSafresh1 1096b46d8ef2Safresh1If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an 1097b46d8ef2Safresh1improvement in speed at the risk of not recursing into subdirectories 1098b46d8ef2Safresh1if a filesystem doesn't populate C<nlink> as expected. 1099b8851fccSafresh1 1100b46d8ef2Safresh1C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms. 1101b8851fccSafresh1 1102*3d61058aSafresh1=item Symlinks 1103b8851fccSafresh1 1104b8851fccSafresh1Be aware that the option to follow symbolic links can be dangerous. 1105b8851fccSafresh1Depending on the structure of the directory tree (including symbolic 1106b8851fccSafresh1links to directories) you might traverse a given (physical) directory 1107b8851fccSafresh1more than once (only if C<follow_fast> is in effect). 1108b8851fccSafresh1Furthermore, deleting or changing files in a symbolically linked directory 1109b8851fccSafresh1might cause very unpleasant surprises, since you delete or change files 1110b8851fccSafresh1in an unknown directory. 1111b8851fccSafresh1 1112b8851fccSafresh1=back 1113b8851fccSafresh1 1114b8851fccSafresh1=head1 HISTORY 1115b8851fccSafresh1 1116*3d61058aSafresh1C<File::Find> used to produce incorrect results if called recursively. 1117b8851fccSafresh1During the development of perl 5.8 this bug was fixed. 1118*3d61058aSafresh1The first fixed version of C<File::Find> was 1.01. 1119b8851fccSafresh1 1120b8851fccSafresh1=head1 SEE ALSO 1121b8851fccSafresh1 1122*3d61058aSafresh1L<find(1)>, L<find2perl> 1123b8851fccSafresh1 1124b8851fccSafresh1=cut 1125