xref: /openbsd-src/gnu/usr.bin/perl/ext/File-Find/lib/File/Find.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
16fb12b70Safresh1package File::Find;
2*3d61058aSafresh1
36fb12b70Safresh1use 5.006;
46fb12b70Safresh1use strict;
56fb12b70Safresh1use warnings;
66fb12b70Safresh1use warnings::register;
7*3d61058aSafresh1
8eac174f2Safresh1use Exporter 'import';
96fb12b70Safresh1require Cwd;
10*3d61058aSafresh1require File::Basename;
11*3d61058aSafresh1require File::Spec;
126fb12b70Safresh1
13*3d61058aSafresh1our $VERSION = '1.44';
146fb12b70Safresh1our @EXPORT = qw(find finddepth);
156fb12b70Safresh1
16b46d8ef2Safresh1my $Is_VMS = $^O eq 'VMS';
17b46d8ef2Safresh1my $Is_Win32 = $^O eq 'MSWin32';
186fb12b70Safresh1
196fb12b70Safresh1
206fb12b70Safresh1# Should ideally be my() not our() but local() currently
216fb12b70Safresh1# refuses to operate on lexicals
226fb12b70Safresh1
236fb12b70Safresh1our %SLnkSeen;
246fb12b70Safresh1our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
256fb12b70Safresh1    $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
266fb12b70Safresh1    $pre_process, $post_process, $dangling_symlinks);
276fb12b70Safresh1
286fb12b70Safresh1sub contract_name {
296fb12b70Safresh1    my ($cdir,$fn) = @_;
306fb12b70Safresh1
316fb12b70Safresh1    return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
326fb12b70Safresh1
336fb12b70Safresh1    $cdir = substr($cdir,0,rindex($cdir,'/')+1);
346fb12b70Safresh1
356fb12b70Safresh1    $fn =~ s|^\./||;
366fb12b70Safresh1
376fb12b70Safresh1    my $abs_name= $cdir . $fn;
386fb12b70Safresh1
396fb12b70Safresh1    if (substr($fn,0,3) eq '../') {
406fb12b70Safresh1       1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
416fb12b70Safresh1    }
426fb12b70Safresh1
436fb12b70Safresh1    return $abs_name;
446fb12b70Safresh1}
456fb12b70Safresh1
46e0680481Safresh1sub _is_absolute {
47e0680481Safresh1    return $_[0] =~ m|^(?:[A-Za-z]:)?/| if $Is_Win32;
48e0680481Safresh1    return substr($_[0], 0, 1) eq '/';
49e0680481Safresh1}
50e0680481Safresh1
51e0680481Safresh1sub _is_root {
52e0680481Safresh1    return $_[0] =~ m|^(?:[A-Za-z]:)?/\z| if $Is_Win32;
53e0680481Safresh1    return $_[0] eq '/';
54e0680481Safresh1}
55e0680481Safresh1
566fb12b70Safresh1sub PathCombine($$) {
576fb12b70Safresh1    my ($Base,$Name) = @_;
586fb12b70Safresh1    my $AbsName;
596fb12b70Safresh1
60e0680481Safresh1    if (_is_absolute($Name)) {
616fb12b70Safresh1        $AbsName= $Name;
626fb12b70Safresh1    }
636fb12b70Safresh1    else {
646fb12b70Safresh1        $AbsName= contract_name($Base,$Name);
656fb12b70Safresh1    }
666fb12b70Safresh1
676fb12b70Safresh1    # (simple) check for recursion
686fb12b70Safresh1    my $newlen= length($AbsName);
696fb12b70Safresh1    if ($newlen <= length($Base)) {
706fb12b70Safresh1        if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
716fb12b70Safresh1            && $AbsName eq substr($Base,0,$newlen))
726fb12b70Safresh1        {
736fb12b70Safresh1            return undef;
746fb12b70Safresh1        }
756fb12b70Safresh1    }
766fb12b70Safresh1    return $AbsName;
776fb12b70Safresh1}
786fb12b70Safresh1
796fb12b70Safresh1sub Follow_SymLink($) {
806fb12b70Safresh1    my ($AbsName) = @_;
816fb12b70Safresh1
826fb12b70Safresh1    my ($NewName,$DEV, $INO);
836fb12b70Safresh1    ($DEV, $INO)= lstat $AbsName;
846fb12b70Safresh1
856fb12b70Safresh1    while (-l _) {
866fb12b70Safresh1        if ($SLnkSeen{$DEV, $INO}++) {
876fb12b70Safresh1            if ($follow_skip < 2) {
886fb12b70Safresh1                die "$AbsName is encountered a second time";
896fb12b70Safresh1            }
906fb12b70Safresh1            else {
916fb12b70Safresh1                return undef;
926fb12b70Safresh1            }
936fb12b70Safresh1        }
94e0680481Safresh1        my $Link = readlink($AbsName);
95e0680481Safresh1        # canonicalize directory separators
96e0680481Safresh1        $Link =~ s|\\|/|g if $Is_Win32;
97e0680481Safresh1        $NewName= PathCombine($AbsName, $Link);
986fb12b70Safresh1        unless(defined $NewName) {
996fb12b70Safresh1            if ($follow_skip < 2) {
1006fb12b70Safresh1                die "$AbsName is a recursive symbolic link";
1016fb12b70Safresh1            }
1026fb12b70Safresh1            else {
1036fb12b70Safresh1                return undef;
1046fb12b70Safresh1            }
1056fb12b70Safresh1        }
1066fb12b70Safresh1        else {
1076fb12b70Safresh1            $AbsName= $NewName;
1086fb12b70Safresh1        }
1096fb12b70Safresh1        ($DEV, $INO) = lstat($AbsName);
1106fb12b70Safresh1        return undef unless defined $DEV;  #  dangling symbolic link
1116fb12b70Safresh1    }
1126fb12b70Safresh1
1136fb12b70Safresh1    if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
1146fb12b70Safresh1        if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
1156fb12b70Safresh1            die "$AbsName encountered a second time";
1166fb12b70Safresh1        }
1176fb12b70Safresh1        else {
1186fb12b70Safresh1            return undef;
1196fb12b70Safresh1        }
1206fb12b70Safresh1    }
1216fb12b70Safresh1
1226fb12b70Safresh1    return $AbsName;
1236fb12b70Safresh1}
1246fb12b70Safresh1
1256fb12b70Safresh1our($dir, $name, $fullname, $prune);
1266fb12b70Safresh1sub _find_dir_symlnk($$$);
1276fb12b70Safresh1sub _find_dir($$$);
1286fb12b70Safresh1
1296fb12b70Safresh1# check whether or not a scalar variable is tainted
1306fb12b70Safresh1# (code straight from the Camel, 3rd ed., page 561)
1316fb12b70Safresh1sub is_tainted_pp {
1326fb12b70Safresh1    my $arg = shift;
1336fb12b70Safresh1    my $nada = substr($arg, 0, 0); # zero-length
1346fb12b70Safresh1    local $@;
1356fb12b70Safresh1    eval { eval "# $nada" };
1366fb12b70Safresh1    return length($@) != 0;
1376fb12b70Safresh1}
1386fb12b70Safresh1
139e0680481Safresh1
1406fb12b70Safresh1sub _find_opt {
1416fb12b70Safresh1    my $wanted = shift;
142b8851fccSafresh1    return unless @_;
1436fb12b70Safresh1    die "invalid top directory" unless defined $_[0];
1446fb12b70Safresh1
1456fb12b70Safresh1    # This function must local()ize everything because callbacks may
1466fb12b70Safresh1    # call find() or finddepth()
1476fb12b70Safresh1
1486fb12b70Safresh1    local %SLnkSeen;
1496fb12b70Safresh1    local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
1506fb12b70Safresh1        $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
1516fb12b70Safresh1        $pre_process, $post_process, $dangling_symlinks);
1526fb12b70Safresh1    local($dir, $name, $fullname, $prune);
1536fb12b70Safresh1    local *_ = \my $a;
1546fb12b70Safresh1
1556fb12b70Safresh1    my $cwd            = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
1566fb12b70Safresh1    if ($Is_VMS) {
1576fb12b70Safresh1        # VMS returns this by default in VMS format which just doesn't
1586fb12b70Safresh1        # work for the rest of this module.
1596fb12b70Safresh1        $cwd = VMS::Filespec::unixpath($cwd);
1606fb12b70Safresh1
1616fb12b70Safresh1        # Apparently this is not expected to have a trailing space.
1626fb12b70Safresh1        # To attempt to make VMS/UNIX conversions mostly reversible,
1636fb12b70Safresh1        # a trailing slash is needed.  The run-time functions ignore the
1646fb12b70Safresh1        # resulting double slash, but it causes the perl tests to fail.
1656fb12b70Safresh1        $cwd =~ s#/\z##;
1666fb12b70Safresh1
1676fb12b70Safresh1        # This comes up in upper case now, but should be lower.
1686fb12b70Safresh1        # In the future this could be exact case, no need to change.
1696fb12b70Safresh1    }
1706fb12b70Safresh1    my $cwd_untainted  = $cwd;
1716fb12b70Safresh1    my $check_t_cwd    = 1;
1726fb12b70Safresh1    $wanted_callback   = $wanted->{wanted};
1736fb12b70Safresh1    $bydepth           = $wanted->{bydepth};
1746fb12b70Safresh1    $pre_process       = $wanted->{preprocess};
1756fb12b70Safresh1    $post_process      = $wanted->{postprocess};
1766fb12b70Safresh1    $no_chdir          = $wanted->{no_chdir};
177eac174f2Safresh1    $full_check        = $wanted->{follow};
178eac174f2Safresh1    $follow            = $full_check || $wanted->{follow_fast};
1796fb12b70Safresh1    $follow_skip       = $wanted->{follow_skip};
1806fb12b70Safresh1    $untaint           = $wanted->{untaint};
1816fb12b70Safresh1    $untaint_pat       = $wanted->{untaint_pattern};
1826fb12b70Safresh1    $untaint_skip      = $wanted->{untaint_skip};
1836fb12b70Safresh1    $dangling_symlinks = $wanted->{dangling_symlinks};
1846fb12b70Safresh1
1856fb12b70Safresh1    # for compatibility reasons (find.pl, find2perl)
1866fb12b70Safresh1    local our ($topdir, $topdev, $topino, $topmode, $topnlink);
1876fb12b70Safresh1
1886fb12b70Safresh1    # a symbolic link to a directory doesn't increase the link count
1896fb12b70Safresh1    $avoid_nlink      = $follow || $File::Find::dont_use_nlink;
1906fb12b70Safresh1
1916fb12b70Safresh1    my ($abs_dir, $Is_Dir);
1926fb12b70Safresh1
1936fb12b70Safresh1    Proc_Top_Item:
1946fb12b70Safresh1    foreach my $TOP (@_) {
1956fb12b70Safresh1        my $top_item = $TOP;
1966fb12b70Safresh1        $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
1976fb12b70Safresh1
1986fb12b70Safresh1        ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
1996fb12b70Safresh1
200e0680481Safresh1        # canonicalize directory separators
201e0680481Safresh1        $top_item =~ s|[/\\]|/|g if $Is_Win32;
202e0680481Safresh1
203e0680481Safresh1        # no trailing / unless path is root
204e0680481Safresh1        $top_item =~ s|/\z|| unless _is_root($top_item);
2056fb12b70Safresh1
2066fb12b70Safresh1        $Is_Dir= 0;
2076fb12b70Safresh1
2086fb12b70Safresh1        if ($follow) {
2096fb12b70Safresh1
210e0680481Safresh1            if (_is_absolute($top_item)) {
2116fb12b70Safresh1                $abs_dir = $top_item;
2126fb12b70Safresh1            }
2136fb12b70Safresh1            elsif ($top_item eq $File::Find::current_dir) {
2146fb12b70Safresh1                $abs_dir = $cwd;
2156fb12b70Safresh1            }
2166fb12b70Safresh1            else {  # care about any  ../
2176fb12b70Safresh1                $top_item =~ s/\.dir\z//i if $Is_VMS;
2186fb12b70Safresh1                $abs_dir = contract_name("$cwd/",$top_item);
2196fb12b70Safresh1            }
2206fb12b70Safresh1            $abs_dir= Follow_SymLink($abs_dir);
2216fb12b70Safresh1            unless (defined $abs_dir) {
2226fb12b70Safresh1                if ($dangling_symlinks) {
2236fb12b70Safresh1                    if (ref $dangling_symlinks eq 'CODE') {
2246fb12b70Safresh1                        $dangling_symlinks->($top_item, $cwd);
2256fb12b70Safresh1                    } else {
2266fb12b70Safresh1                        warnings::warnif "$top_item is a dangling symbolic link\n";
2276fb12b70Safresh1                    }
2286fb12b70Safresh1                }
2296fb12b70Safresh1                next Proc_Top_Item;
2306fb12b70Safresh1            }
2316fb12b70Safresh1
2326fb12b70Safresh1            if (-d _) {
2336fb12b70Safresh1                $top_item =~ s/\.dir\z//i if $Is_VMS;
2346fb12b70Safresh1                _find_dir_symlnk($wanted, $abs_dir, $top_item);
2356fb12b70Safresh1                $Is_Dir= 1;
2366fb12b70Safresh1            }
2376fb12b70Safresh1        }
2386fb12b70Safresh1        else { # no follow
2396fb12b70Safresh1            $topdir = $top_item;
2406fb12b70Safresh1            unless (defined $topnlink) {
2416fb12b70Safresh1                warnings::warnif "Can't stat $top_item: $!\n";
2426fb12b70Safresh1                next Proc_Top_Item;
2436fb12b70Safresh1            }
2446fb12b70Safresh1            if (-d _) {
2456fb12b70Safresh1                $top_item =~ s/\.dir\z//i if $Is_VMS;
2466fb12b70Safresh1                _find_dir($wanted, $top_item, $topnlink);
2476fb12b70Safresh1                $Is_Dir= 1;
2486fb12b70Safresh1            }
2496fb12b70Safresh1            else {
2506fb12b70Safresh1                $abs_dir= $top_item;
2516fb12b70Safresh1            }
2526fb12b70Safresh1        }
2536fb12b70Safresh1
2546fb12b70Safresh1        unless ($Is_Dir) {
2556fb12b70Safresh1            unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
2566fb12b70Safresh1                ($dir,$_) = ('./', $top_item);
2576fb12b70Safresh1            }
2586fb12b70Safresh1
2596fb12b70Safresh1            $abs_dir = $dir;
2606fb12b70Safresh1            if (( $untaint ) && (is_tainted($dir) )) {
2616fb12b70Safresh1                ( $abs_dir ) = $dir =~ m|$untaint_pat|;
2626fb12b70Safresh1                unless (defined $abs_dir) {
2636fb12b70Safresh1                    if ($untaint_skip == 0) {
2646fb12b70Safresh1                        die "directory $dir is still tainted";
2656fb12b70Safresh1                    }
2666fb12b70Safresh1                    else {
2676fb12b70Safresh1                        next Proc_Top_Item;
2686fb12b70Safresh1                    }
2696fb12b70Safresh1                }
2706fb12b70Safresh1            }
2716fb12b70Safresh1
2726fb12b70Safresh1            unless ($no_chdir || chdir $abs_dir) {
2736fb12b70Safresh1                warnings::warnif "Couldn't chdir $abs_dir: $!\n";
2746fb12b70Safresh1                next Proc_Top_Item;
2756fb12b70Safresh1            }
2766fb12b70Safresh1
2776fb12b70Safresh1            $name = $abs_dir . $_; # $File::Find::name
2786fb12b70Safresh1            $_ = $name if $no_chdir;
2796fb12b70Safresh1
2806fb12b70Safresh1            { $wanted_callback->() }; # protect against wild "next"
2816fb12b70Safresh1
2826fb12b70Safresh1        }
2836fb12b70Safresh1
2846fb12b70Safresh1        unless ( $no_chdir ) {
2856fb12b70Safresh1            if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
2866fb12b70Safresh1                ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
2876fb12b70Safresh1                unless (defined $cwd_untainted) {
2886fb12b70Safresh1                    die "insecure cwd in find(depth)";
2896fb12b70Safresh1                }
2906fb12b70Safresh1                $check_t_cwd = 0;
2916fb12b70Safresh1            }
2926fb12b70Safresh1            unless (chdir $cwd_untainted) {
2936fb12b70Safresh1                die "Can't cd to $cwd: $!\n";
2946fb12b70Safresh1            }
2956fb12b70Safresh1        }
2966fb12b70Safresh1    }
2976fb12b70Safresh1}
2986fb12b70Safresh1
2996fb12b70Safresh1# API:
3006fb12b70Safresh1#  $wanted
3016fb12b70Safresh1#  $p_dir :  "parent directory"
3026fb12b70Safresh1#  $nlink :  what came back from the stat
3036fb12b70Safresh1# preconditions:
3046fb12b70Safresh1#  chdir (if not no_chdir) to dir
3056fb12b70Safresh1
3066fb12b70Safresh1sub _find_dir($$$) {
3076fb12b70Safresh1    my ($wanted, $p_dir, $nlink) = @_;
3086fb12b70Safresh1    my ($CdLvl,$Level) = (0,0);
3096fb12b70Safresh1    my @Stack;
3106fb12b70Safresh1    my @filenames;
3116fb12b70Safresh1    my ($subcount,$sub_nlink);
3126fb12b70Safresh1    my $SE= [];
3136fb12b70Safresh1    my $dir_name= $p_dir;
3146fb12b70Safresh1    my $dir_pref;
3156fb12b70Safresh1    my $dir_rel = $File::Find::current_dir;
3166fb12b70Safresh1    my $tainted = 0;
3176fb12b70Safresh1    my $no_nlink;
3186fb12b70Safresh1
319e0680481Safresh1    if ($Is_VMS) {
3206fb12b70Safresh1        # VMS is returning trailing .dir on directories
3216fb12b70Safresh1        # and trailing . on files and symbolic links
3226fb12b70Safresh1        # in UNIX syntax.
3236fb12b70Safresh1        #
3246fb12b70Safresh1
3256fb12b70Safresh1        $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
3266fb12b70Safresh1
3276fb12b70Safresh1        $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
3286fb12b70Safresh1    }
3296fb12b70Safresh1    else {
330e0680481Safresh1        $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/";
3316fb12b70Safresh1    }
3326fb12b70Safresh1
333eac174f2Safresh1    local ($dir, $name, $prune);
3346fb12b70Safresh1
3356fb12b70Safresh1    unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
3366fb12b70Safresh1        my $udir = $p_dir;
3376fb12b70Safresh1        if (( $untaint ) && (is_tainted($p_dir) )) {
3386fb12b70Safresh1            ( $udir ) = $p_dir =~ m|$untaint_pat|;
3396fb12b70Safresh1            unless (defined $udir) {
3406fb12b70Safresh1                if ($untaint_skip == 0) {
3416fb12b70Safresh1                    die "directory $p_dir is still tainted";
3426fb12b70Safresh1                }
3436fb12b70Safresh1                else {
3446fb12b70Safresh1                    return;
3456fb12b70Safresh1                }
3466fb12b70Safresh1            }
3476fb12b70Safresh1        }
3486fb12b70Safresh1        unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
3496fb12b70Safresh1            warnings::warnif "Can't cd to $udir: $!\n";
3506fb12b70Safresh1            return;
3516fb12b70Safresh1        }
3526fb12b70Safresh1    }
3536fb12b70Safresh1
3546fb12b70Safresh1    # push the starting directory
3556fb12b70Safresh1    push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
3566fb12b70Safresh1
3576fb12b70Safresh1    while (defined $SE) {
3586fb12b70Safresh1        unless ($bydepth) {
3596fb12b70Safresh1            $dir= $p_dir; # $File::Find::dir
3606fb12b70Safresh1            $name= $dir_name; # $File::Find::name
3616fb12b70Safresh1            $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
3626fb12b70Safresh1            # prune may happen here
3636fb12b70Safresh1            $prune= 0;
3646fb12b70Safresh1            { $wanted_callback->() };   # protect against wild "next"
3656fb12b70Safresh1            next if $prune;
3666fb12b70Safresh1        }
3676fb12b70Safresh1
3686fb12b70Safresh1        # change to that directory
3696fb12b70Safresh1        unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
3706fb12b70Safresh1            my $udir= $dir_rel;
3716fb12b70Safresh1            if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
3726fb12b70Safresh1                ( $udir ) = $dir_rel =~ m|$untaint_pat|;
3736fb12b70Safresh1                unless (defined $udir) {
3746fb12b70Safresh1                    if ($untaint_skip == 0) {
3756fb12b70Safresh1                        die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
3766fb12b70Safresh1                    } else { # $untaint_skip == 1
3776fb12b70Safresh1                        next;
3786fb12b70Safresh1                    }
3796fb12b70Safresh1                }
3806fb12b70Safresh1            }
3816fb12b70Safresh1            unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
3826fb12b70Safresh1                warnings::warnif "Can't cd to (" .
3836fb12b70Safresh1                    ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
3846fb12b70Safresh1                next;
3856fb12b70Safresh1            }
3866fb12b70Safresh1            $CdLvl++;
3876fb12b70Safresh1        }
3886fb12b70Safresh1
3896fb12b70Safresh1        $dir= $dir_name; # $File::Find::dir
3906fb12b70Safresh1
3916fb12b70Safresh1        # Get the list of files in the current directory.
392eac174f2Safresh1        my $dh;
393eac174f2Safresh1        unless (opendir $dh, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
3946fb12b70Safresh1            warnings::warnif "Can't opendir($dir_name): $!\n";
3956fb12b70Safresh1            next;
3966fb12b70Safresh1        }
397eac174f2Safresh1        @filenames = readdir $dh;
398eac174f2Safresh1        closedir($dh);
3996fb12b70Safresh1        @filenames = $pre_process->(@filenames) if $pre_process;
4006fb12b70Safresh1        push @Stack,[$CdLvl,$dir_name,"",-2]   if $post_process;
4016fb12b70Safresh1
4026fb12b70Safresh1        # default: use whatever was specified
4036fb12b70Safresh1        # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
4046fb12b70Safresh1        $no_nlink = $avoid_nlink;
4056fb12b70Safresh1        # if dir has wrong nlink count, force switch to slower stat method
4066fb12b70Safresh1        $no_nlink = 1 if ($nlink < 2);
4076fb12b70Safresh1
4086fb12b70Safresh1        if ($nlink == 2 && !$no_nlink) {
4096fb12b70Safresh1            # This dir has no subdirectories.
4106fb12b70Safresh1            for my $FN (@filenames) {
4116fb12b70Safresh1                if ($Is_VMS) {
4126fb12b70Safresh1                    # Big hammer here - Compensate for VMS trailing . and .dir
4136fb12b70Safresh1                    # No win situation until this is changed, but this
4146fb12b70Safresh1                    # will handle the majority of the cases with breaking the fewest
4156fb12b70Safresh1
4166fb12b70Safresh1                    $FN =~ s/\.dir\z//i;
4176fb12b70Safresh1                    $FN =~ s#\.$## if ($FN ne '.');
4186fb12b70Safresh1                }
4196fb12b70Safresh1                next if $FN =~ $File::Find::skip_pattern;
4206fb12b70Safresh1
4216fb12b70Safresh1                $name = $dir_pref . $FN; # $File::Find::name
4226fb12b70Safresh1                $_ = ($no_chdir ? $name : $FN); # $_
4236fb12b70Safresh1                { $wanted_callback->() }; # protect against wild "next"
4246fb12b70Safresh1            }
4256fb12b70Safresh1
4266fb12b70Safresh1        }
4276fb12b70Safresh1        else {
4286fb12b70Safresh1            # This dir has subdirectories.
4296fb12b70Safresh1            $subcount = $nlink - 2;
4306fb12b70Safresh1
431b8851fccSafresh1            # HACK: insert directories at this position, so as to preserve
432b8851fccSafresh1            # the user pre-processed ordering of files (thus ensuring
433b8851fccSafresh1            # directory traversal is in user sorted order, not at random).
4346fb12b70Safresh1            my $stack_top = @Stack;
4356fb12b70Safresh1
4366fb12b70Safresh1            for my $FN (@filenames) {
4376fb12b70Safresh1                next if $FN =~ $File::Find::skip_pattern;
4386fb12b70Safresh1                if ($subcount > 0 || $no_nlink) {
4396fb12b70Safresh1                    # Seen all the subdirs?
4406fb12b70Safresh1                    # check for directoriness.
4416fb12b70Safresh1                    # stat is faster for a file in the current directory
4426fb12b70Safresh1                    $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
4436fb12b70Safresh1
4446fb12b70Safresh1                    if (-d _) {
4456fb12b70Safresh1                        --$subcount;
4466fb12b70Safresh1                        $FN =~ s/\.dir\z//i if $Is_VMS;
4476fb12b70Safresh1                        # HACK: replace push to preserve dir traversal order
4486fb12b70Safresh1                        #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
4496fb12b70Safresh1                        splice @Stack, $stack_top, 0,
4506fb12b70Safresh1                                 [$CdLvl,$dir_name,$FN,$sub_nlink];
4516fb12b70Safresh1                    }
4526fb12b70Safresh1                    else {
4536fb12b70Safresh1                        $name = $dir_pref . $FN; # $File::Find::name
4546fb12b70Safresh1                        $_= ($no_chdir ? $name : $FN); # $_
4556fb12b70Safresh1                        { $wanted_callback->() }; # protect against wild "next"
4566fb12b70Safresh1                    }
4576fb12b70Safresh1                }
4586fb12b70Safresh1                else {
4596fb12b70Safresh1                    $name = $dir_pref . $FN; # $File::Find::name
4606fb12b70Safresh1                    $_= ($no_chdir ? $name : $FN); # $_
4616fb12b70Safresh1                    { $wanted_callback->() }; # protect against wild "next"
4626fb12b70Safresh1                }
4636fb12b70Safresh1            }
4646fb12b70Safresh1        }
4656fb12b70Safresh1    }
4666fb12b70Safresh1    continue {
4676fb12b70Safresh1        while ( defined ($SE = pop @Stack) ) {
4686fb12b70Safresh1            ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
4696fb12b70Safresh1            if ($CdLvl > $Level && !$no_chdir) {
4706fb12b70Safresh1                my $tmp;
4716fb12b70Safresh1                if ($Is_VMS) {
4726fb12b70Safresh1                    $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
4736fb12b70Safresh1                }
4746fb12b70Safresh1                else {
4756fb12b70Safresh1                    $tmp = join('/',('..') x ($CdLvl-$Level));
4766fb12b70Safresh1                }
4776fb12b70Safresh1                die "Can't cd to $tmp from $dir_name: $!"
4786fb12b70Safresh1                    unless chdir ($tmp);
4796fb12b70Safresh1                $CdLvl = $Level;
4806fb12b70Safresh1            }
4816fb12b70Safresh1
482e0680481Safresh1            if ($^O eq 'VMS') {
4836fb12b70Safresh1                if ($p_dir =~ m/[\]>]+$/) {
4846fb12b70Safresh1                    $dir_name = $p_dir;
4856fb12b70Safresh1                    $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
4866fb12b70Safresh1                    $dir_pref = $dir_name;
4876fb12b70Safresh1                }
4886fb12b70Safresh1                else {
4896fb12b70Safresh1                    $dir_name = "$p_dir/$dir_rel";
4906fb12b70Safresh1                    $dir_pref = "$dir_name/";
4916fb12b70Safresh1                }
4926fb12b70Safresh1            }
4936fb12b70Safresh1            else {
494e0680481Safresh1                $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel";
4956fb12b70Safresh1                $dir_pref = "$dir_name/";
4966fb12b70Safresh1            }
4976fb12b70Safresh1
4986fb12b70Safresh1            if ( $nlink == -2 ) {
4996fb12b70Safresh1                $name = $dir = $p_dir; # $File::Find::name / dir
5006fb12b70Safresh1                $_ = $File::Find::current_dir;
5016fb12b70Safresh1                $post_process->();              # End-of-directory processing
5026fb12b70Safresh1            }
5036fb12b70Safresh1            elsif ( $nlink < 0 ) {  # must be finddepth, report dirname now
5046fb12b70Safresh1                $name = $dir_name;
5056fb12b70Safresh1                if ( substr($name,-2) eq '/.' ) {
5066fb12b70Safresh1                    substr($name, length($name) == 2 ? -1 : -2) = '';
5076fb12b70Safresh1                }
5086fb12b70Safresh1                $dir = $p_dir;
5096fb12b70Safresh1                $_ = ($no_chdir ? $dir_name : $dir_rel );
5106fb12b70Safresh1                if ( substr($_,-2) eq '/.' ) {
5116fb12b70Safresh1                    substr($_, length($_) == 2 ? -1 : -2) = '';
5126fb12b70Safresh1                }
5136fb12b70Safresh1                { $wanted_callback->() }; # protect against wild "next"
5146fb12b70Safresh1             }
5156fb12b70Safresh1             else {
5166fb12b70Safresh1                push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
5176fb12b70Safresh1                last;
5186fb12b70Safresh1            }
5196fb12b70Safresh1        }
5206fb12b70Safresh1    }
5216fb12b70Safresh1}
5226fb12b70Safresh1
5236fb12b70Safresh1
5246fb12b70Safresh1# API:
5256fb12b70Safresh1#  $wanted
5266fb12b70Safresh1#  $dir_loc : absolute location of a dir
5276fb12b70Safresh1#  $p_dir   : "parent directory"
5286fb12b70Safresh1# preconditions:
5296fb12b70Safresh1#  chdir (if not no_chdir) to dir
5306fb12b70Safresh1
5316fb12b70Safresh1sub _find_dir_symlnk($$$) {
5326fb12b70Safresh1    my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
5336fb12b70Safresh1    my @Stack;
5346fb12b70Safresh1    my @filenames;
5356fb12b70Safresh1    my $new_loc;
5366fb12b70Safresh1    my $updir_loc = $dir_loc; # untainted parent directory
5376fb12b70Safresh1    my $SE = [];
5386fb12b70Safresh1    my $dir_name = $p_dir;
5396fb12b70Safresh1    my $dir_pref;
5406fb12b70Safresh1    my $loc_pref;
5416fb12b70Safresh1    my $dir_rel = $File::Find::current_dir;
5426fb12b70Safresh1    my $byd_flag; # flag for pending stack entry if $bydepth
5436fb12b70Safresh1    my $tainted = 0;
5446fb12b70Safresh1    my $ok = 1;
5456fb12b70Safresh1
546e0680481Safresh1    $dir_pref = _is_root($p_dir) ? $p_dir : "$p_dir/";
547e0680481Safresh1    $loc_pref = _is_root($dir_loc) ? $dir_loc : "$dir_loc/";
5486fb12b70Safresh1
549eac174f2Safresh1    local ($dir, $name, $fullname, $prune);
5506fb12b70Safresh1
5516fb12b70Safresh1    unless ($no_chdir) {
5526fb12b70Safresh1        # untaint the topdir
5536fb12b70Safresh1        if (( $untaint ) && (is_tainted($dir_loc) )) {
5546fb12b70Safresh1            ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
5556fb12b70Safresh1            # once untainted, $updir_loc is pushed on the stack (as parent directory);
5566fb12b70Safresh1            # hence, we don't need to untaint the parent directory every time we chdir
5576fb12b70Safresh1            # to it later
5586fb12b70Safresh1            unless (defined $updir_loc) {
5596fb12b70Safresh1                if ($untaint_skip == 0) {
5606fb12b70Safresh1                    die "directory $dir_loc is still tainted";
5616fb12b70Safresh1                }
5626fb12b70Safresh1                else {
5636fb12b70Safresh1                    return;
5646fb12b70Safresh1                }
5656fb12b70Safresh1            }
5666fb12b70Safresh1        }
5676fb12b70Safresh1        $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
5686fb12b70Safresh1        unless ($ok) {
5696fb12b70Safresh1            warnings::warnif "Can't cd to $updir_loc: $!\n";
5706fb12b70Safresh1            return;
5716fb12b70Safresh1        }
5726fb12b70Safresh1    }
5736fb12b70Safresh1
5746fb12b70Safresh1    push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1]  if  $bydepth;
5756fb12b70Safresh1
5766fb12b70Safresh1    while (defined $SE) {
5776fb12b70Safresh1
5786fb12b70Safresh1        unless ($bydepth) {
5796fb12b70Safresh1            # change (back) to parent directory (always untainted)
5806fb12b70Safresh1            unless ($no_chdir) {
5816fb12b70Safresh1                unless (chdir $updir_loc) {
5826fb12b70Safresh1                    warnings::warnif "Can't cd to $updir_loc: $!\n";
5836fb12b70Safresh1                    next;
5846fb12b70Safresh1                }
5856fb12b70Safresh1            }
5866fb12b70Safresh1            $dir= $p_dir; # $File::Find::dir
5876fb12b70Safresh1            $name= $dir_name; # $File::Find::name
5886fb12b70Safresh1            $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
5896fb12b70Safresh1            $fullname= $dir_loc; # $File::Find::fullname
5906fb12b70Safresh1            # prune may happen here
5916fb12b70Safresh1            $prune= 0;
5926fb12b70Safresh1            lstat($_); # make sure  file tests with '_' work
5936fb12b70Safresh1            { $wanted_callback->() }; # protect against wild "next"
5946fb12b70Safresh1            next if $prune;
5956fb12b70Safresh1        }
5966fb12b70Safresh1
5976fb12b70Safresh1        # change to that directory
5986fb12b70Safresh1        unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
5996fb12b70Safresh1            $updir_loc = $dir_loc;
6006fb12b70Safresh1            if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
6016fb12b70Safresh1                # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
6026fb12b70Safresh1                ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
6036fb12b70Safresh1                unless (defined $updir_loc) {
6046fb12b70Safresh1                    if ($untaint_skip == 0) {
6056fb12b70Safresh1                        die "directory $dir_loc is still tainted";
6066fb12b70Safresh1                    }
6076fb12b70Safresh1                    else {
6086fb12b70Safresh1                        next;
6096fb12b70Safresh1                    }
6106fb12b70Safresh1                }
6116fb12b70Safresh1            }
6126fb12b70Safresh1            unless (chdir $updir_loc) {
6136fb12b70Safresh1                warnings::warnif "Can't cd to $updir_loc: $!\n";
6146fb12b70Safresh1                next;
6156fb12b70Safresh1            }
6166fb12b70Safresh1        }
6176fb12b70Safresh1
6186fb12b70Safresh1        $dir = $dir_name; # $File::Find::dir
6196fb12b70Safresh1
6206fb12b70Safresh1        # Get the list of files in the current directory.
621eac174f2Safresh1        my $dh;
622eac174f2Safresh1        unless (opendir $dh, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
6236fb12b70Safresh1            warnings::warnif "Can't opendir($dir_loc): $!\n";
6246fb12b70Safresh1            next;
6256fb12b70Safresh1        }
626eac174f2Safresh1        @filenames = readdir $dh;
627eac174f2Safresh1        closedir($dh);
6286fb12b70Safresh1
6296fb12b70Safresh1        for my $FN (@filenames) {
6306fb12b70Safresh1            if ($Is_VMS) {
6316fb12b70Safresh1                # Big hammer here - Compensate for VMS trailing . and .dir
6326fb12b70Safresh1                # No win situation until this is changed, but this
6336fb12b70Safresh1                # will handle the majority of the cases with breaking the fewest.
6346fb12b70Safresh1
6356fb12b70Safresh1                $FN =~ s/\.dir\z//i;
6366fb12b70Safresh1                $FN =~ s#\.$## if ($FN ne '.');
6376fb12b70Safresh1            }
6386fb12b70Safresh1            next if $FN =~ $File::Find::skip_pattern;
6396fb12b70Safresh1
6406fb12b70Safresh1            # follow symbolic links / do an lstat
6416fb12b70Safresh1            $new_loc = Follow_SymLink($loc_pref.$FN);
6426fb12b70Safresh1
6436fb12b70Safresh1            # ignore if invalid symlink
6446fb12b70Safresh1            unless (defined $new_loc) {
6456fb12b70Safresh1                if (!defined -l _ && $dangling_symlinks) {
6466fb12b70Safresh1                $fullname = undef;
6476fb12b70Safresh1                    if (ref $dangling_symlinks eq 'CODE') {
6486fb12b70Safresh1                        $dangling_symlinks->($FN, $dir_pref);
6496fb12b70Safresh1                    } else {
6506fb12b70Safresh1                        warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
6516fb12b70Safresh1                    }
6526fb12b70Safresh1                }
6536fb12b70Safresh1            else {
6546fb12b70Safresh1                $fullname = $loc_pref . $FN;
6556fb12b70Safresh1            }
6566fb12b70Safresh1                $name = $dir_pref . $FN;
6576fb12b70Safresh1                $_ = ($no_chdir ? $name : $FN);
6586fb12b70Safresh1                { $wanted_callback->() };
6596fb12b70Safresh1                next;
6606fb12b70Safresh1            }
6616fb12b70Safresh1
6626fb12b70Safresh1            if (-d _) {
6636fb12b70Safresh1                if ($Is_VMS) {
6646fb12b70Safresh1                    $FN =~ s/\.dir\z//i;
6656fb12b70Safresh1                    $FN =~ s#\.$## if ($FN ne '.');
6666fb12b70Safresh1                    $new_loc =~ s/\.dir\z//i;
6676fb12b70Safresh1                    $new_loc =~ s#\.$## if ($new_loc ne '.');
6686fb12b70Safresh1                }
6696fb12b70Safresh1                push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
6706fb12b70Safresh1            }
6716fb12b70Safresh1            else {
6726fb12b70Safresh1                $fullname = $new_loc; # $File::Find::fullname
6736fb12b70Safresh1                $name = $dir_pref . $FN; # $File::Find::name
6746fb12b70Safresh1                $_ = ($no_chdir ? $name : $FN); # $_
6756fb12b70Safresh1                { $wanted_callback->() }; # protect against wild "next"
6766fb12b70Safresh1            }
6776fb12b70Safresh1        }
6786fb12b70Safresh1
6796fb12b70Safresh1    }
6806fb12b70Safresh1    continue {
6816fb12b70Safresh1        while (defined($SE = pop @Stack)) {
6826fb12b70Safresh1            ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
683e0680481Safresh1            $dir_name = _is_root($p_dir) ? "$p_dir$dir_rel" : "$p_dir/$dir_rel";
6846fb12b70Safresh1            $dir_pref = "$dir_name/";
6856fb12b70Safresh1            $loc_pref = "$dir_loc/";
6866fb12b70Safresh1            if ( $byd_flag < 0 ) {  # must be finddepth, report dirname now
6876fb12b70Safresh1                unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
6886fb12b70Safresh1                    unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
6896fb12b70Safresh1                        warnings::warnif "Can't cd to $updir_loc: $!\n";
6906fb12b70Safresh1                        next;
6916fb12b70Safresh1                    }
6926fb12b70Safresh1                }
6936fb12b70Safresh1                $fullname = $dir_loc; # $File::Find::fullname
6946fb12b70Safresh1                $name = $dir_name; # $File::Find::name
6956fb12b70Safresh1                if ( substr($name,-2) eq '/.' ) {
6966fb12b70Safresh1                    substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
6976fb12b70Safresh1                }
6986fb12b70Safresh1                $dir = $p_dir; # $File::Find::dir
6996fb12b70Safresh1                $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
7006fb12b70Safresh1                if ( substr($_,-2) eq '/.' ) {
7016fb12b70Safresh1                    substr($_, length($_) == 2 ? -1 : -2) = '';
7026fb12b70Safresh1                }
7036fb12b70Safresh1
7046fb12b70Safresh1                lstat($_); # make sure file tests with '_' work
7056fb12b70Safresh1                { $wanted_callback->() }; # protect against wild "next"
7066fb12b70Safresh1            }
7076fb12b70Safresh1            else {
7086fb12b70Safresh1                push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1]  if  $bydepth;
7096fb12b70Safresh1                last;
7106fb12b70Safresh1            }
7116fb12b70Safresh1        }
7126fb12b70Safresh1    }
7136fb12b70Safresh1}
7146fb12b70Safresh1
7156fb12b70Safresh1
7166fb12b70Safresh1sub wrap_wanted {
7176fb12b70Safresh1    my $wanted = shift;
7186fb12b70Safresh1    if ( ref($wanted) eq 'HASH' ) {
719b8851fccSafresh1        # RT #122547
720b8851fccSafresh1        my %valid_options = map {$_ => 1} qw(
721b8851fccSafresh1            wanted
722b8851fccSafresh1            bydepth
723b8851fccSafresh1            preprocess
724b8851fccSafresh1            postprocess
725b8851fccSafresh1            follow
726b8851fccSafresh1            follow_fast
727b8851fccSafresh1            follow_skip
728b8851fccSafresh1            dangling_symlinks
729b8851fccSafresh1            no_chdir
730b8851fccSafresh1            untaint
731b8851fccSafresh1            untaint_pattern
732b8851fccSafresh1            untaint_skip
733b8851fccSafresh1        );
734b8851fccSafresh1        my @invalid_options = ();
735b8851fccSafresh1        for my $v (keys %{$wanted}) {
736b8851fccSafresh1            push @invalid_options, $v unless exists $valid_options{$v};
737b8851fccSafresh1        }
738b8851fccSafresh1        warn "Invalid option(s): @invalid_options" if @invalid_options;
739b8851fccSafresh1
7406fb12b70Safresh1        unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
7416fb12b70Safresh1            die 'no &wanted subroutine given';
7426fb12b70Safresh1        }
7436fb12b70Safresh1        if ( $wanted->{follow} || $wanted->{follow_fast}) {
7446fb12b70Safresh1            $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
7456fb12b70Safresh1        }
7466fb12b70Safresh1        if ( $wanted->{untaint} ) {
7476fb12b70Safresh1            $wanted->{untaint_pattern} = $File::Find::untaint_pattern
7486fb12b70Safresh1            unless defined $wanted->{untaint_pattern};
7496fb12b70Safresh1            $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
7506fb12b70Safresh1        }
7516fb12b70Safresh1        return $wanted;
7526fb12b70Safresh1    }
7536fb12b70Safresh1    elsif( ref( $wanted ) eq 'CODE' ) {
7546fb12b70Safresh1        return { wanted => $wanted };
7556fb12b70Safresh1    }
7566fb12b70Safresh1    else {
7576fb12b70Safresh1       die 'no &wanted subroutine given';
7586fb12b70Safresh1    }
7596fb12b70Safresh1}
7606fb12b70Safresh1
7616fb12b70Safresh1sub find {
7626fb12b70Safresh1    my $wanted = shift;
7636fb12b70Safresh1    _find_opt(wrap_wanted($wanted), @_);
7646fb12b70Safresh1}
7656fb12b70Safresh1
7666fb12b70Safresh1sub finddepth {
7676fb12b70Safresh1    my $wanted = wrap_wanted(shift);
7686fb12b70Safresh1    $wanted->{bydepth} = 1;
7696fb12b70Safresh1    _find_opt($wanted, @_);
7706fb12b70Safresh1}
7716fb12b70Safresh1
7726fb12b70Safresh1# default
7736fb12b70Safresh1$File::Find::skip_pattern    = qr/^\.{1,2}\z/;
7746fb12b70Safresh1$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
7756fb12b70Safresh1
7766fb12b70Safresh1# this _should_ work properly on all platforms
7776fb12b70Safresh1# where File::Find can be expected to work
7786fb12b70Safresh1$File::Find::current_dir = File::Spec->curdir || '.';
7796fb12b70Safresh1
780b46d8ef2Safresh1$File::Find::dont_use_nlink = 1;
7816fb12b70Safresh1
7826fb12b70Safresh1# We need a function that checks if a scalar is tainted. Either use the
7836fb12b70Safresh1# Scalar::Util module's tainted() function or our (slower) pure Perl
7846fb12b70Safresh1# fallback is_tainted_pp()
7856fb12b70Safresh1{
7866fb12b70Safresh1    local $@;
7876fb12b70Safresh1    eval { require Scalar::Util };
7886fb12b70Safresh1    *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
7896fb12b70Safresh1}
7906fb12b70Safresh1
7916fb12b70Safresh11;
792b8851fccSafresh1
793b8851fccSafresh1__END__
794b8851fccSafresh1
795b8851fccSafresh1=head1 NAME
796b8851fccSafresh1
797b8851fccSafresh1File::Find - Traverse a directory tree.
798b8851fccSafresh1
799b8851fccSafresh1=head1 SYNOPSIS
800b8851fccSafresh1
801b8851fccSafresh1    use File::Find;
802b8851fccSafresh1    find(\&wanted, @directories_to_search);
803b8851fccSafresh1    sub wanted { ... }
804b8851fccSafresh1
805b8851fccSafresh1    use File::Find;
806b8851fccSafresh1    finddepth(\&wanted, @directories_to_search);
807b8851fccSafresh1    sub wanted { ... }
808b8851fccSafresh1
809b8851fccSafresh1    use File::Find;
810b8851fccSafresh1    find({ wanted => \&process, follow => 1 }, '.');
811b8851fccSafresh1
812b8851fccSafresh1=head1 DESCRIPTION
813b8851fccSafresh1
814b8851fccSafresh1These are functions for searching through directory trees doing work
815*3d61058aSafresh1on each file found similar to the Unix L<find(1)> command.  C<File::Find>
816b8851fccSafresh1exports two functions, C<find> and C<finddepth>.  They work similarly
817b8851fccSafresh1but have subtle differences.
818b8851fccSafresh1
819b8851fccSafresh1=over 4
820b8851fccSafresh1
821b8851fccSafresh1=item B<find>
822b8851fccSafresh1
823b8851fccSafresh1  find(\&wanted,  @directories);
824b8851fccSafresh1  find(\%options, @directories);
825b8851fccSafresh1
826b8851fccSafresh1C<find()> does a depth-first search over the given C<@directories> in
827b8851fccSafresh1the order they are given.  For each file or directory found, it calls
828b8851fccSafresh1the C<&wanted> subroutine.  (See below for details on how to use the
829b8851fccSafresh1C<&wanted> function).  Additionally, for each directory found, it will
830b8851fccSafresh1C<chdir()> into that directory and continue the search, invoking the
831b8851fccSafresh1C<&wanted> function on each file or subdirectory in the directory.
832b8851fccSafresh1
833b8851fccSafresh1=item B<finddepth>
834b8851fccSafresh1
835b8851fccSafresh1  finddepth(\&wanted,  @directories);
836b8851fccSafresh1  finddepth(\%options, @directories);
837b8851fccSafresh1
838b8851fccSafresh1C<finddepth()> works just like C<find()> except that it invokes the
839b8851fccSafresh1C<&wanted> function for a directory I<after> invoking it for the
840b8851fccSafresh1directory's contents.  It does a postorder traversal instead of a
841b8851fccSafresh1preorder traversal, working from the bottom of the directory tree up
842b8851fccSafresh1where C<find()> works from the top of the tree down.
843b8851fccSafresh1
844b8851fccSafresh1=back
845b8851fccSafresh1
846eac174f2Safresh1Despite the name of the C<finddepth()> function, both C<find()> and
847eac174f2Safresh1C<finddepth()> perform a depth-first search of the directory hierarchy.
848eac174f2Safresh1
849*3d61058aSafresh1=head2 C<%options>
850b8851fccSafresh1
851b8851fccSafresh1The first argument to C<find()> is either a code reference to your
852b8851fccSafresh1C<&wanted> function, or a hash reference describing the operations
853b8851fccSafresh1to be performed for each file.  The
854b8851fccSafresh1code reference is described in L</The wanted function> below.
855b8851fccSafresh1
856*3d61058aSafresh1Here are the possible B<keys> for the hash:
857b8851fccSafresh1
858eac174f2Safresh1=over 4
859b8851fccSafresh1
860b8851fccSafresh1=item C<wanted>
861b8851fccSafresh1
862b8851fccSafresh1The value should be a code reference.  This code reference is
863b8851fccSafresh1described in L</The wanted function> below. The C<&wanted> subroutine is
864b8851fccSafresh1mandatory.
865b8851fccSafresh1
866b8851fccSafresh1=item C<bydepth>
867b8851fccSafresh1
868b8851fccSafresh1Reports the name of a directory only AFTER all its entries
869b8851fccSafresh1have been reported.  Entry point C<finddepth()> is a shortcut for
870b8851fccSafresh1specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
871b8851fccSafresh1
872b8851fccSafresh1=item C<preprocess>
873b8851fccSafresh1
874b8851fccSafresh1The value should be a code reference. This code reference is used to
875b8851fccSafresh1preprocess the current directory. The name of the currently processed
876b8851fccSafresh1directory is in C<$File::Find::dir>. Your preprocessing function is
877b8851fccSafresh1called after C<readdir()>, but before the loop that calls the C<wanted()>
878b8851fccSafresh1function. It is called with a list of strings (actually file/directory
879b8851fccSafresh1names) and is expected to return a list of strings. The code can be
880b8851fccSafresh1used to sort the file/directory names alphabetically, numerically,
881b8851fccSafresh1or to filter out directory entries based on their name alone. When
882*3d61058aSafresh1C<follow> or C<follow_fast> are in effect, C<preprocess> is a no-op.
883b8851fccSafresh1
884b8851fccSafresh1=item C<postprocess>
885b8851fccSafresh1
886b8851fccSafresh1The value should be a code reference. It is invoked just before leaving
887b8851fccSafresh1the currently processed directory. It is called in void context with no
888b8851fccSafresh1arguments. The name of the current directory is in C<$File::Find::dir>. This
889b8851fccSafresh1hook is handy for summarizing a directory, such as calculating its disk
890*3d61058aSafresh1usage. When C<follow> or C<follow_fast> are in effect, C<postprocess> is a
891b8851fccSafresh1no-op.
892b8851fccSafresh1
893b8851fccSafresh1=item C<follow>
894b8851fccSafresh1
895b8851fccSafresh1Causes symbolic links to be followed. Since directory trees with symbolic
896b8851fccSafresh1links (followed) may contain files more than once and may even have
897b8851fccSafresh1cycles, a hash has to be built up with an entry for each file.
898b8851fccSafresh1This might be expensive both in space and time for a large
899b8851fccSafresh1directory tree. See L</follow_fast> and L</follow_skip> below.
900*3d61058aSafresh1If either C<follow> or C<follow_fast> is in effect:
901b8851fccSafresh1
902eac174f2Safresh1=over 4
903b8851fccSafresh1
904b8851fccSafresh1=item *
905b8851fccSafresh1
906*3d61058aSafresh1It is guaranteed that an C<lstat> has been called before the user's
907b8851fccSafresh1C<wanted()> function is called. This enables fast file checks involving C<_>.
908*3d61058aSafresh1Note that this guarantee no longer holds if C<follow> or C<follow_fast>
909b8851fccSafresh1are not set.
910b8851fccSafresh1
911b8851fccSafresh1=item *
912b8851fccSafresh1
913b8851fccSafresh1There is a variable C<$File::Find::fullname> which holds the absolute
914b8851fccSafresh1pathname of the file with all symbolic links resolved.  If the link is
915b8851fccSafresh1a dangling symbolic link, then fullname will be set to C<undef>.
916b8851fccSafresh1
917b8851fccSafresh1=back
918b8851fccSafresh1
919b8851fccSafresh1=item C<follow_fast>
920b8851fccSafresh1
921*3d61058aSafresh1This is similar to C<follow> except that it may report some files more
922b8851fccSafresh1than once.  It does detect cycles, however.  Since only symbolic links
923b8851fccSafresh1have to be hashed, this is much cheaper both in space and time.  If
924b8851fccSafresh1processing a file more than once (by the user's C<wanted()> function)
925*3d61058aSafresh1is worse than just taking time, the option C<follow> should be used.
926b8851fccSafresh1
927b8851fccSafresh1=item C<follow_skip>
928b8851fccSafresh1
929b8851fccSafresh1C<follow_skip==1>, which is the default, causes all files which are
930b8851fccSafresh1neither directories nor symbolic links to be ignored if they are about
931b8851fccSafresh1to be processed a second time. If a directory or a symbolic link
932*3d61058aSafresh1are about to be processed a second time, C<File::Find> dies.
933b8851fccSafresh1
934*3d61058aSafresh1C<follow_skip==0> causes C<File::Find> to die if any file is about to be
935b8851fccSafresh1processed a second time.
936b8851fccSafresh1
937*3d61058aSafresh1C<follow_skip==2> causes C<File::Find> to ignore any duplicate files and
938b8851fccSafresh1directories but to proceed normally otherwise.
939b8851fccSafresh1
940b8851fccSafresh1=item C<dangling_symlinks>
941b8851fccSafresh1
942b8851fccSafresh1Specifies what to do with symbolic links whose target doesn't exist.
943b8851fccSafresh1If true and a code reference, will be called with the symbolic link
944b8851fccSafresh1name and the directory it lives in as arguments.  Otherwise, if true
945b8851fccSafresh1and warnings are on, a warning of the form C<"symbolic_link_name is a dangling
946b8851fccSafresh1symbolic link\n"> will be issued.  If false, the dangling symbolic link
947b8851fccSafresh1will be silently ignored.
948b8851fccSafresh1
949b8851fccSafresh1=item C<no_chdir>
950b8851fccSafresh1
951b8851fccSafresh1Does not C<chdir()> to each directory as it recurses. The C<wanted()>
952b8851fccSafresh1function will need to be aware of this, of course. In this case,
953b8851fccSafresh1C<$_> will be the same as C<$File::Find::name>.
954b8851fccSafresh1
955b8851fccSafresh1=item C<untaint>
956b8851fccSafresh1
957*3d61058aSafresh1If find is used in L<taint-mode|perlsec/Taint mode> (C<-T> command line
958*3d61058aSafresh1switch or C<if EUID != UID> or C<if EGID != GID>), then internally
959*3d61058aSafresh1directory names have to be untainted before they can be C<chdir>'d to.
960*3d61058aSafresh1Therefore they are checked against a regular expression C<untaint_pattern>.
961*3d61058aSafresh1Note that all names passed to the user's C<wanted()> function are still
962*3d61058aSafresh1tainted. If this option is used while not in taint-mode, C<untaint>
963*3d61058aSafresh1is a no-op.
964b8851fccSafresh1
965b8851fccSafresh1=item C<untaint_pattern>
966b8851fccSafresh1
967b8851fccSafresh1See above. This should be set using the C<qr> quoting operator.
968b8851fccSafresh1The default is set to C<qr|^([-+@\w./]+)$|>.
969b8851fccSafresh1Note that the parentheses are vital.
970b8851fccSafresh1
971b8851fccSafresh1=item C<untaint_skip>
972b8851fccSafresh1
973*3d61058aSafresh1If set, a directory which fails the C<untaint_pattern> is skipped,
974b8851fccSafresh1including all its sub-directories. The default is to C<die> in such a case.
975b8851fccSafresh1
976b8851fccSafresh1=back
977b8851fccSafresh1
978b8851fccSafresh1=head2 The wanted function
979b8851fccSafresh1
980b8851fccSafresh1The C<wanted()> function does whatever verifications you want on
981b8851fccSafresh1each file and directory.  Note that despite its name, the C<wanted()>
982b8851fccSafresh1function is a generic callback function, and does B<not> tell
983*3d61058aSafresh1C<File::Find> if a file is "wanted" or not.  In fact, its return value
984b8851fccSafresh1is ignored.
985b8851fccSafresh1
986*3d61058aSafresh1The C<wanted> function takes no arguments but rather does its work
987b8851fccSafresh1through a collection of variables.
988b8851fccSafresh1
989b8851fccSafresh1=over 4
990b8851fccSafresh1
991b8851fccSafresh1=item C<$File::Find::dir> is the current directory name,
992b8851fccSafresh1
993b8851fccSafresh1=item C<$_> is the current filename within that directory
994b8851fccSafresh1
995b8851fccSafresh1=item C<$File::Find::name> is the complete pathname to the file.
996b8851fccSafresh1
997b8851fccSafresh1=back
998b8851fccSafresh1
999b8851fccSafresh1The above variables have all been localized and may be changed without
1000b8851fccSafresh1affecting data outside of the wanted function.
1001b8851fccSafresh1
1002b8851fccSafresh1For example, when examining the file F</some/path/foo.ext> you will have:
1003b8851fccSafresh1
1004b8851fccSafresh1    $File::Find::dir  = /some/path/
1005b8851fccSafresh1    $_                = foo.ext
1006b8851fccSafresh1    $File::Find::name = /some/path/foo.ext
1007b8851fccSafresh1
1008*3d61058aSafresh1You are C<chdir()>'d to C<$File::Find::dir> when the function is called,
1009b8851fccSafresh1unless C<no_chdir> was specified. Note that when changing to
1010b8851fccSafresh1directories is in effect, the root directory (F</>) is a somewhat
1011b8851fccSafresh1special case inasmuch as the concatenation of C<$File::Find::dir>,
1012b8851fccSafresh1C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
1013b8851fccSafresh1table below summarizes all variants:
1014b8851fccSafresh1
1015b8851fccSafresh1              $File::Find::name  $File::Find::dir  $_
1016b8851fccSafresh1 default      /                  /                 .
1017b8851fccSafresh1 no_chdir=>0  /etc               /                 etc
1018b8851fccSafresh1              /etc/x             /etc              x
1019b8851fccSafresh1
1020b8851fccSafresh1 no_chdir=>1  /                  /                 /
1021b8851fccSafresh1              /etc               /                 /etc
1022b8851fccSafresh1              /etc/x             /etc              /etc/x
1023b8851fccSafresh1
1024b8851fccSafresh1
1025b8851fccSafresh1When C<follow> or C<follow_fast> are in effect, there is
1026b8851fccSafresh1also a C<$File::Find::fullname>.  The function may set
1027b8851fccSafresh1C<$File::Find::prune> to prune the tree unless C<bydepth> was
1028b8851fccSafresh1specified.  Unless C<follow> or C<follow_fast> is specified, for
1029*3d61058aSafresh1compatibility reasons (C<find.pl>, L<find2perl>) there are
1030*3d61058aSafresh1in addition the following globals available: C<$File::Find::topdir>,
1031b8851fccSafresh1C<$File::Find::topdev>, C<$File::Find::topino>,
1032b8851fccSafresh1C<$File::Find::topmode> and C<$File::Find::topnlink>.
1033b8851fccSafresh1
1034*3d61058aSafresh1This library is useful for the C<find2perl> tool (distributed with the
1035*3d61058aSafresh1L<App::find2perl> CPAN module), which when fed:
1036b8851fccSafresh1
1037b8851fccSafresh1  find2perl / -name .nfs\* -mtime +7 \
1038b8851fccSafresh1    -exec rm -f {} \; -o -fstype nfs -prune
1039b8851fccSafresh1
1040b8851fccSafresh1produces something like:
1041b8851fccSafresh1
1042b8851fccSafresh1 sub wanted {
1043b8851fccSafresh1    /^\.nfs.*\z/s &&
1044b8851fccSafresh1    (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
1045b8851fccSafresh1    int(-M _) > 7 &&
1046b8851fccSafresh1    unlink($_)
1047b8851fccSafresh1    ||
1048b8851fccSafresh1    ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
1049b8851fccSafresh1    $dev < 0 &&
1050b8851fccSafresh1    ($File::Find::prune = 1);
1051b8851fccSafresh1 }
1052b8851fccSafresh1
1053b8851fccSafresh1Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
1054b8851fccSafresh1filehandle that caches the information from the preceding
1055b8851fccSafresh1C<stat()>, C<lstat()>, or filetest.
1056b8851fccSafresh1
1057b8851fccSafresh1Here's another interesting wanted function.  It will find all symbolic
1058b8851fccSafresh1links that don't resolve:
1059b8851fccSafresh1
1060b8851fccSafresh1    sub wanted {
1061b8851fccSafresh1         -l && !-e && print "bogus link: $File::Find::name\n";
1062b8851fccSafresh1    }
1063b8851fccSafresh1
1064b8851fccSafresh1Note that you may mix directories and (non-directory) files in the list of
1065b8851fccSafresh1directories to be searched by the C<wanted()> function.
1066b8851fccSafresh1
1067b8851fccSafresh1    find(\&wanted, "./foo", "./bar", "./baz/epsilon");
1068b8851fccSafresh1
1069b8851fccSafresh1In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
1070b8851fccSafresh1evaluated by C<wanted()>.
1071b8851fccSafresh1
1072b8851fccSafresh1See also the script C<pfind> on CPAN for a nice application of this
1073b8851fccSafresh1module.
1074b8851fccSafresh1
1075b8851fccSafresh1=head1 WARNINGS
1076b8851fccSafresh1
1077b8851fccSafresh1If you run your program with the C<-w> switch, or if you use the
1078b8851fccSafresh1C<warnings> pragma, File::Find will report warnings for several weird
1079b8851fccSafresh1situations. You can disable these warnings by putting the statement
1080b8851fccSafresh1
1081b8851fccSafresh1    no warnings 'File::Find';
1082b8851fccSafresh1
1083b8851fccSafresh1in the appropriate scope. See L<warnings> for more info about lexical
1084b8851fccSafresh1warnings.
1085b8851fccSafresh1
1086eac174f2Safresh1=head1 BUGS AND CAVEATS
1087b8851fccSafresh1
1088eac174f2Safresh1=over 4
1089b8851fccSafresh1
1090*3d61058aSafresh1=item C<$dont_use_nlink>
1091b8851fccSafresh1
1092*3d61058aSafresh1You can set the variable C<$File::Find::dont_use_nlink> to C<0> if you
1093b46d8ef2Safresh1are sure the filesystem you are scanning reflects the number of
1094b46d8ef2Safresh1subdirectories in the parent directory's C<nlink> count.
1095b8851fccSafresh1
1096b46d8ef2Safresh1If you do set C<$File::Find::dont_use_nlink> to 0, you may notice an
1097b46d8ef2Safresh1improvement in speed at the risk of not recursing into subdirectories
1098b46d8ef2Safresh1if a filesystem doesn't populate C<nlink> as expected.
1099b8851fccSafresh1
1100b46d8ef2Safresh1C<$File::Find::dont_use_nlink> now defaults to 1 on all platforms.
1101b8851fccSafresh1
1102*3d61058aSafresh1=item Symlinks
1103b8851fccSafresh1
1104b8851fccSafresh1Be aware that the option to follow symbolic links can be dangerous.
1105b8851fccSafresh1Depending on the structure of the directory tree (including symbolic
1106b8851fccSafresh1links to directories) you might traverse a given (physical) directory
1107b8851fccSafresh1more than once (only if C<follow_fast> is in effect).
1108b8851fccSafresh1Furthermore, deleting or changing files in a symbolically linked directory
1109b8851fccSafresh1might cause very unpleasant surprises, since you delete or change files
1110b8851fccSafresh1in an unknown directory.
1111b8851fccSafresh1
1112b8851fccSafresh1=back
1113b8851fccSafresh1
1114b8851fccSafresh1=head1 HISTORY
1115b8851fccSafresh1
1116*3d61058aSafresh1C<File::Find> used to produce incorrect results if called recursively.
1117b8851fccSafresh1During the development of perl 5.8 this bug was fixed.
1118*3d61058aSafresh1The first fixed version of C<File::Find> was 1.01.
1119b8851fccSafresh1
1120b8851fccSafresh1=head1 SEE ALSO
1121b8851fccSafresh1
1122*3d61058aSafresh1L<find(1)>, L<find2perl>
1123b8851fccSafresh1
1124b8851fccSafresh1=cut
1125