xref: /openbsd-src/gnu/usr.bin/perl/ext/File-Find/lib/File/Find.pm (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1package File::Find;
2use 5.006;
3use strict;
4use warnings;
5use warnings::register;
6our $VERSION = '1.34';
7require Exporter;
8require Cwd;
9
10our @ISA = qw(Exporter);
11our @EXPORT = qw(find finddepth);
12
13
14use strict;
15my $Is_VMS;
16my $Is_Win32;
17
18require File::Basename;
19require File::Spec;
20
21# Should ideally be my() not our() but local() currently
22# refuses to operate on lexicals
23
24our %SLnkSeen;
25our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
26    $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
27    $pre_process, $post_process, $dangling_symlinks);
28
29sub contract_name {
30    my ($cdir,$fn) = @_;
31
32    return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
33
34    $cdir = substr($cdir,0,rindex($cdir,'/')+1);
35
36    $fn =~ s|^\./||;
37
38    my $abs_name= $cdir . $fn;
39
40    if (substr($fn,0,3) eq '../') {
41       1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
42    }
43
44    return $abs_name;
45}
46
47sub PathCombine($$) {
48    my ($Base,$Name) = @_;
49    my $AbsName;
50
51    if (substr($Name,0,1) eq '/') {
52	$AbsName= $Name;
53    }
54    else {
55	$AbsName= contract_name($Base,$Name);
56    }
57
58    # (simple) check for recursion
59    my $newlen= length($AbsName);
60    if ($newlen <= length($Base)) {
61	if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
62	    && $AbsName eq substr($Base,0,$newlen))
63	{
64	    return undef;
65	}
66    }
67    return $AbsName;
68}
69
70sub Follow_SymLink($) {
71    my ($AbsName) = @_;
72
73    my ($NewName,$DEV, $INO);
74    ($DEV, $INO)= lstat $AbsName;
75
76    while (-l _) {
77	if ($SLnkSeen{$DEV, $INO}++) {
78	    if ($follow_skip < 2) {
79		die "$AbsName is encountered a second time";
80	    }
81	    else {
82		return undef;
83	    }
84	}
85	$NewName= PathCombine($AbsName, readlink($AbsName));
86	unless(defined $NewName) {
87	    if ($follow_skip < 2) {
88		die "$AbsName is a recursive symbolic link";
89	    }
90	    else {
91		return undef;
92	    }
93	}
94	else {
95	    $AbsName= $NewName;
96	}
97	($DEV, $INO) = lstat($AbsName);
98	return undef unless defined $DEV;  #  dangling symbolic link
99    }
100
101    if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
102	if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
103	    die "$AbsName encountered a second time";
104	}
105	else {
106	    return undef;
107	}
108    }
109
110    return $AbsName;
111}
112
113our($dir, $name, $fullname, $prune);
114sub _find_dir_symlnk($$$);
115sub _find_dir($$$);
116
117# check whether or not a scalar variable is tainted
118# (code straight from the Camel, 3rd ed., page 561)
119sub is_tainted_pp {
120    my $arg = shift;
121    my $nada = substr($arg, 0, 0); # zero-length
122    local $@;
123    eval { eval "# $nada" };
124    return length($@) != 0;
125}
126
127sub _find_opt {
128    my $wanted = shift;
129    return unless @_;
130    die "invalid top directory" unless defined $_[0];
131
132    # This function must local()ize everything because callbacks may
133    # call find() or finddepth()
134
135    local %SLnkSeen;
136    local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
137	$follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
138	$pre_process, $post_process, $dangling_symlinks);
139    local($dir, $name, $fullname, $prune);
140    local *_ = \my $a;
141
142    my $cwd            = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
143    if ($Is_VMS) {
144	# VMS returns this by default in VMS format which just doesn't
145	# work for the rest of this module.
146	$cwd = VMS::Filespec::unixpath($cwd);
147
148	# Apparently this is not expected to have a trailing space.
149	# To attempt to make VMS/UNIX conversions mostly reversible,
150	# a trailing slash is needed.  The run-time functions ignore the
151	# resulting double slash, but it causes the perl tests to fail.
152        $cwd =~ s#/\z##;
153
154	# This comes up in upper case now, but should be lower.
155	# In the future this could be exact case, no need to change.
156    }
157    my $cwd_untainted  = $cwd;
158    my $check_t_cwd    = 1;
159    $wanted_callback   = $wanted->{wanted};
160    $bydepth           = $wanted->{bydepth};
161    $pre_process       = $wanted->{preprocess};
162    $post_process      = $wanted->{postprocess};
163    $no_chdir          = $wanted->{no_chdir};
164    $full_check        = $Is_Win32 ? 0 : $wanted->{follow};
165    $follow            = $Is_Win32 ? 0 :
166                             $full_check || $wanted->{follow_fast};
167    $follow_skip       = $wanted->{follow_skip};
168    $untaint           = $wanted->{untaint};
169    $untaint_pat       = $wanted->{untaint_pattern};
170    $untaint_skip      = $wanted->{untaint_skip};
171    $dangling_symlinks = $wanted->{dangling_symlinks};
172
173    # for compatibility reasons (find.pl, find2perl)
174    local our ($topdir, $topdev, $topino, $topmode, $topnlink);
175
176    # a symbolic link to a directory doesn't increase the link count
177    $avoid_nlink      = $follow || $File::Find::dont_use_nlink;
178
179    my ($abs_dir, $Is_Dir);
180
181    Proc_Top_Item:
182    foreach my $TOP (@_) {
183	my $top_item = $TOP;
184	$top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
185
186	($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
187
188	if ($Is_Win32) {
189	    $top_item =~ s|[/\\]\z||
190	      unless $top_item =~ m{^(?:\w:)?[/\\]$};
191	}
192	else {
193	    $top_item =~ s|/\z|| unless $top_item eq '/';
194	}
195
196	$Is_Dir= 0;
197
198	if ($follow) {
199
200	    if (substr($top_item,0,1) eq '/') {
201		$abs_dir = $top_item;
202	    }
203	    elsif ($top_item eq $File::Find::current_dir) {
204		$abs_dir = $cwd;
205	    }
206	    else {  # care about any  ../
207		$top_item =~ s/\.dir\z//i if $Is_VMS;
208		$abs_dir = contract_name("$cwd/",$top_item);
209	    }
210	    $abs_dir= Follow_SymLink($abs_dir);
211	    unless (defined $abs_dir) {
212		if ($dangling_symlinks) {
213		    if (ref $dangling_symlinks eq 'CODE') {
214			$dangling_symlinks->($top_item, $cwd);
215		    } else {
216			warnings::warnif "$top_item is a dangling symbolic link\n";
217		    }
218		}
219		next Proc_Top_Item;
220	    }
221
222	    if (-d _) {
223		$top_item =~ s/\.dir\z//i if $Is_VMS;
224		_find_dir_symlnk($wanted, $abs_dir, $top_item);
225		$Is_Dir= 1;
226	    }
227	}
228	else { # no follow
229	    $topdir = $top_item;
230	    unless (defined $topnlink) {
231		warnings::warnif "Can't stat $top_item: $!\n";
232		next Proc_Top_Item;
233	    }
234	    if (-d _) {
235		$top_item =~ s/\.dir\z//i if $Is_VMS;
236		_find_dir($wanted, $top_item, $topnlink);
237		$Is_Dir= 1;
238	    }
239	    else {
240		$abs_dir= $top_item;
241	    }
242	}
243
244	unless ($Is_Dir) {
245	    unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
246		($dir,$_) = ('./', $top_item);
247	    }
248
249	    $abs_dir = $dir;
250	    if (( $untaint ) && (is_tainted($dir) )) {
251		( $abs_dir ) = $dir =~ m|$untaint_pat|;
252		unless (defined $abs_dir) {
253		    if ($untaint_skip == 0) {
254			die "directory $dir is still tainted";
255		    }
256		    else {
257			next Proc_Top_Item;
258		    }
259		}
260	    }
261
262	    unless ($no_chdir || chdir $abs_dir) {
263		warnings::warnif "Couldn't chdir $abs_dir: $!\n";
264		next Proc_Top_Item;
265	    }
266
267	    $name = $abs_dir . $_; # $File::Find::name
268	    $_ = $name if $no_chdir;
269
270	    { $wanted_callback->() }; # protect against wild "next"
271
272	}
273
274	unless ( $no_chdir ) {
275	    if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
276		( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
277		unless (defined $cwd_untainted) {
278		    die "insecure cwd in find(depth)";
279		}
280		$check_t_cwd = 0;
281	    }
282	    unless (chdir $cwd_untainted) {
283		die "Can't cd to $cwd: $!\n";
284	    }
285	}
286    }
287}
288
289# API:
290#  $wanted
291#  $p_dir :  "parent directory"
292#  $nlink :  what came back from the stat
293# preconditions:
294#  chdir (if not no_chdir) to dir
295
296sub _find_dir($$$) {
297    my ($wanted, $p_dir, $nlink) = @_;
298    my ($CdLvl,$Level) = (0,0);
299    my @Stack;
300    my @filenames;
301    my ($subcount,$sub_nlink);
302    my $SE= [];
303    my $dir_name= $p_dir;
304    my $dir_pref;
305    my $dir_rel = $File::Find::current_dir;
306    my $tainted = 0;
307    my $no_nlink;
308
309    if ($Is_Win32) {
310	$dir_pref
311	  = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
312    } elsif ($Is_VMS) {
313
314	#	VMS is returning trailing .dir on directories
315	#	and trailing . on files and symbolic links
316	#	in UNIX syntax.
317	#
318
319	$p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
320
321	$dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
322    }
323    else {
324	$dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
325    }
326
327    local ($dir, $name, $prune, *DIR);
328
329    unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
330	my $udir = $p_dir;
331	if (( $untaint ) && (is_tainted($p_dir) )) {
332	    ( $udir ) = $p_dir =~ m|$untaint_pat|;
333	    unless (defined $udir) {
334		if ($untaint_skip == 0) {
335		    die "directory $p_dir is still tainted";
336		}
337		else {
338		    return;
339		}
340	    }
341	}
342	unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
343	    warnings::warnif "Can't cd to $udir: $!\n";
344	    return;
345	}
346    }
347
348    # push the starting directory
349    push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
350
351    while (defined $SE) {
352	unless ($bydepth) {
353	    $dir= $p_dir; # $File::Find::dir
354	    $name= $dir_name; # $File::Find::name
355	    $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
356	    # prune may happen here
357	    $prune= 0;
358	    { $wanted_callback->() };	# protect against wild "next"
359	    next if $prune;
360	}
361
362	# change to that directory
363	unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
364	    my $udir= $dir_rel;
365	    if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
366		( $udir ) = $dir_rel =~ m|$untaint_pat|;
367		unless (defined $udir) {
368		    if ($untaint_skip == 0) {
369			die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
370		    } else { # $untaint_skip == 1
371			next;
372		    }
373		}
374	    }
375	    unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
376		warnings::warnif "Can't cd to (" .
377		    ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
378		next;
379	    }
380	    $CdLvl++;
381	}
382
383	$dir= $dir_name; # $File::Find::dir
384
385	# Get the list of files in the current directory.
386	unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
387	    warnings::warnif "Can't opendir($dir_name): $!\n";
388	    next;
389	}
390	@filenames = readdir DIR;
391	closedir(DIR);
392	@filenames = $pre_process->(@filenames) if $pre_process;
393	push @Stack,[$CdLvl,$dir_name,"",-2]   if $post_process;
394
395	# default: use whatever was specified
396        # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
397        $no_nlink = $avoid_nlink;
398        # if dir has wrong nlink count, force switch to slower stat method
399        $no_nlink = 1 if ($nlink < 2);
400
401	if ($nlink == 2 && !$no_nlink) {
402	    # This dir has no subdirectories.
403	    for my $FN (@filenames) {
404		if ($Is_VMS) {
405		# Big hammer here - Compensate for VMS trailing . and .dir
406		# No win situation until this is changed, but this
407		# will handle the majority of the cases with breaking the fewest
408
409		    $FN =~ s/\.dir\z//i;
410		    $FN =~ s#\.$## if ($FN ne '.');
411		}
412		next if $FN =~ $File::Find::skip_pattern;
413
414		$name = $dir_pref . $FN; # $File::Find::name
415		$_ = ($no_chdir ? $name : $FN); # $_
416		{ $wanted_callback->() }; # protect against wild "next"
417	    }
418
419	}
420	else {
421	    # This dir has subdirectories.
422	    $subcount = $nlink - 2;
423
424	    # HACK: insert directories at this position, so as to preserve
425	    # the user pre-processed ordering of files (thus ensuring
426	    # directory traversal is in user sorted order, not at random).
427            my $stack_top = @Stack;
428
429	    for my $FN (@filenames) {
430		next if $FN =~ $File::Find::skip_pattern;
431		if ($subcount > 0 || $no_nlink) {
432		    # Seen all the subdirs?
433		    # check for directoriness.
434		    # stat is faster for a file in the current directory
435		    $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
436
437		    if (-d _) {
438			--$subcount;
439			$FN =~ s/\.dir\z//i if $Is_VMS;
440			# HACK: replace push to preserve dir traversal order
441			#push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
442			splice @Stack, $stack_top, 0,
443			         [$CdLvl,$dir_name,$FN,$sub_nlink];
444		    }
445		    else {
446			$name = $dir_pref . $FN; # $File::Find::name
447			$_= ($no_chdir ? $name : $FN); # $_
448			{ $wanted_callback->() }; # protect against wild "next"
449		    }
450		}
451		else {
452		    $name = $dir_pref . $FN; # $File::Find::name
453		    $_= ($no_chdir ? $name : $FN); # $_
454		    { $wanted_callback->() }; # protect against wild "next"
455		}
456	    }
457	}
458    }
459    continue {
460	while ( defined ($SE = pop @Stack) ) {
461	    ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
462	    if ($CdLvl > $Level && !$no_chdir) {
463		my $tmp;
464		if ($Is_VMS) {
465		    $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
466		}
467		else {
468		    $tmp = join('/',('..') x ($CdLvl-$Level));
469		}
470		die "Can't cd to $tmp from $dir_name: $!"
471		    unless chdir ($tmp);
472		$CdLvl = $Level;
473	    }
474
475	    if ($Is_Win32) {
476		$dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
477		    ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
478		$dir_pref = "$dir_name/";
479	    }
480	    elsif ($^O eq 'VMS') {
481                if ($p_dir =~ m/[\]>]+$/) {
482                    $dir_name = $p_dir;
483                    $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
484                    $dir_pref = $dir_name;
485                }
486                else {
487                    $dir_name = "$p_dir/$dir_rel";
488                    $dir_pref = "$dir_name/";
489                }
490	    }
491	    else {
492		$dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
493		$dir_pref = "$dir_name/";
494	    }
495
496	    if ( $nlink == -2 ) {
497		$name = $dir = $p_dir; # $File::Find::name / dir
498                $_ = $File::Find::current_dir;
499		$post_process->();		# End-of-directory processing
500	    }
501	    elsif ( $nlink < 0 ) {  # must be finddepth, report dirname now
502		$name = $dir_name;
503		if ( substr($name,-2) eq '/.' ) {
504		    substr($name, length($name) == 2 ? -1 : -2) = '';
505		}
506		$dir = $p_dir;
507		$_ = ($no_chdir ? $dir_name : $dir_rel );
508		if ( substr($_,-2) eq '/.' ) {
509		    substr($_, length($_) == 2 ? -1 : -2) = '';
510		}
511		{ $wanted_callback->() }; # protect against wild "next"
512	     }
513	     else {
514		push @Stack,[$CdLvl,$p_dir,$dir_rel,-1]  if  $bydepth;
515		last;
516	    }
517	}
518    }
519}
520
521
522# API:
523#  $wanted
524#  $dir_loc : absolute location of a dir
525#  $p_dir   : "parent directory"
526# preconditions:
527#  chdir (if not no_chdir) to dir
528
529sub _find_dir_symlnk($$$) {
530    my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
531    my @Stack;
532    my @filenames;
533    my $new_loc;
534    my $updir_loc = $dir_loc; # untainted parent directory
535    my $SE = [];
536    my $dir_name = $p_dir;
537    my $dir_pref;
538    my $loc_pref;
539    my $dir_rel = $File::Find::current_dir;
540    my $byd_flag; # flag for pending stack entry if $bydepth
541    my $tainted = 0;
542    my $ok = 1;
543
544    $dir_pref = ( $p_dir   eq '/' ? '/' : "$p_dir/" );
545    $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
546
547    local ($dir, $name, $fullname, $prune, *DIR);
548
549    unless ($no_chdir) {
550	# untaint the topdir
551	if (( $untaint ) && (is_tainted($dir_loc) )) {
552	    ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
553	     # once untainted, $updir_loc is pushed on the stack (as parent directory);
554	    # hence, we don't need to untaint the parent directory every time we chdir
555	    # to it later
556	    unless (defined $updir_loc) {
557		if ($untaint_skip == 0) {
558		    die "directory $dir_loc is still tainted";
559		}
560		else {
561		    return;
562		}
563	    }
564	}
565	$ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
566	unless ($ok) {
567	    warnings::warnif "Can't cd to $updir_loc: $!\n";
568	    return;
569	}
570    }
571
572    push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1]  if  $bydepth;
573
574    while (defined $SE) {
575
576	unless ($bydepth) {
577	    # change (back) to parent directory (always untainted)
578	    unless ($no_chdir) {
579		unless (chdir $updir_loc) {
580		    warnings::warnif "Can't cd to $updir_loc: $!\n";
581		    next;
582		}
583	    }
584	    $dir= $p_dir; # $File::Find::dir
585	    $name= $dir_name; # $File::Find::name
586	    $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
587	    $fullname= $dir_loc; # $File::Find::fullname
588	    # prune may happen here
589	    $prune= 0;
590	    lstat($_); # make sure  file tests with '_' work
591	    { $wanted_callback->() }; # protect against wild "next"
592	    next if $prune;
593	}
594
595	# change to that directory
596	unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
597	    $updir_loc = $dir_loc;
598	    if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
599		# untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
600		( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
601		unless (defined $updir_loc) {
602		    if ($untaint_skip == 0) {
603			die "directory $dir_loc is still tainted";
604		    }
605		    else {
606			next;
607		    }
608		}
609	    }
610	    unless (chdir $updir_loc) {
611		warnings::warnif "Can't cd to $updir_loc: $!\n";
612		next;
613	    }
614	}
615
616	$dir = $dir_name; # $File::Find::dir
617
618	# Get the list of files in the current directory.
619	unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
620	    warnings::warnif "Can't opendir($dir_loc): $!\n";
621	    next;
622	}
623	@filenames = readdir DIR;
624	closedir(DIR);
625
626	for my $FN (@filenames) {
627	    if ($Is_VMS) {
628	    # Big hammer here - Compensate for VMS trailing . and .dir
629	    # No win situation until this is changed, but this
630	    # will handle the majority of the cases with breaking the fewest.
631
632		$FN =~ s/\.dir\z//i;
633		$FN =~ s#\.$## if ($FN ne '.');
634	    }
635	    next if $FN =~ $File::Find::skip_pattern;
636
637	    # follow symbolic links / do an lstat
638	    $new_loc = Follow_SymLink($loc_pref.$FN);
639
640	    # ignore if invalid symlink
641	    unless (defined $new_loc) {
642	        if (!defined -l _ && $dangling_symlinks) {
643                $fullname = undef;
644	            if (ref $dangling_symlinks eq 'CODE') {
645	                $dangling_symlinks->($FN, $dir_pref);
646	            } else {
647	                warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
648	            }
649	        }
650            else {
651                $fullname = $loc_pref . $FN;
652            }
653	        $name = $dir_pref . $FN;
654	        $_ = ($no_chdir ? $name : $FN);
655	        { $wanted_callback->() };
656	        next;
657	    }
658
659	    if (-d _) {
660		if ($Is_VMS) {
661		    $FN =~ s/\.dir\z//i;
662		    $FN =~ s#\.$## if ($FN ne '.');
663		    $new_loc =~ s/\.dir\z//i;
664		    $new_loc =~ s#\.$## if ($new_loc ne '.');
665		}
666		push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
667	    }
668	    else {
669		$fullname = $new_loc; # $File::Find::fullname
670		$name = $dir_pref . $FN; # $File::Find::name
671		$_ = ($no_chdir ? $name : $FN); # $_
672		{ $wanted_callback->() }; # protect against wild "next"
673	    }
674	}
675
676    }
677    continue {
678	while (defined($SE = pop @Stack)) {
679	    ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
680	    $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
681	    $dir_pref = "$dir_name/";
682	    $loc_pref = "$dir_loc/";
683	    if ( $byd_flag < 0 ) {  # must be finddepth, report dirname now
684		unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
685		    unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
686			warnings::warnif "Can't cd to $updir_loc: $!\n";
687			next;
688		    }
689		}
690		$fullname = $dir_loc; # $File::Find::fullname
691		$name = $dir_name; # $File::Find::name
692		if ( substr($name,-2) eq '/.' ) {
693		    substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
694		}
695		$dir = $p_dir; # $File::Find::dir
696		$_ = ($no_chdir ? $dir_name : $dir_rel); # $_
697		if ( substr($_,-2) eq '/.' ) {
698		    substr($_, length($_) == 2 ? -1 : -2) = '';
699		}
700
701		lstat($_); # make sure file tests with '_' work
702		{ $wanted_callback->() }; # protect against wild "next"
703	    }
704	    else {
705		push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1]  if  $bydepth;
706		last;
707	    }
708	}
709    }
710}
711
712
713sub wrap_wanted {
714    my $wanted = shift;
715    if ( ref($wanted) eq 'HASH' ) {
716        # RT #122547
717        my %valid_options = map {$_ => 1} qw(
718            wanted
719            bydepth
720            preprocess
721            postprocess
722            follow
723            follow_fast
724            follow_skip
725            dangling_symlinks
726            no_chdir
727            untaint
728            untaint_pattern
729            untaint_skip
730        );
731        my @invalid_options = ();
732        for my $v (keys %{$wanted}) {
733            push @invalid_options, $v unless exists $valid_options{$v};
734        }
735        warn "Invalid option(s): @invalid_options" if @invalid_options;
736
737        unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
738            die 'no &wanted subroutine given';
739        }
740        if ( $wanted->{follow} || $wanted->{follow_fast}) {
741            $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
742        }
743        if ( $wanted->{untaint} ) {
744            $wanted->{untaint_pattern} = $File::Find::untaint_pattern
745            unless defined $wanted->{untaint_pattern};
746            $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
747        }
748        return $wanted;
749    }
750    elsif( ref( $wanted ) eq 'CODE' ) {
751        return { wanted => $wanted };
752    }
753    else {
754       die 'no &wanted subroutine given';
755    }
756}
757
758sub find {
759    my $wanted = shift;
760    _find_opt(wrap_wanted($wanted), @_);
761}
762
763sub finddepth {
764    my $wanted = wrap_wanted(shift);
765    $wanted->{bydepth} = 1;
766    _find_opt($wanted, @_);
767}
768
769# default
770$File::Find::skip_pattern    = qr/^\.{1,2}\z/;
771$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
772
773# These are hard-coded for now, but may move to hint files.
774if ($^O eq 'VMS') {
775    $Is_VMS = 1;
776    $File::Find::dont_use_nlink = 1;
777}
778elsif ($^O eq 'MSWin32') {
779    $Is_Win32 = 1;
780}
781
782# this _should_ work properly on all platforms
783# where File::Find can be expected to work
784$File::Find::current_dir = File::Spec->curdir || '.';
785
786$File::Find::dont_use_nlink = 1
787    if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 ||
788       $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'qnx' || $^O eq 'nto';
789
790# Set dont_use_nlink in your hint file if your system's stat doesn't
791# report the number of links in a directory as an indication
792# of the number of files.
793# See e.g. hints/haiku.sh for Haiku.
794unless ($File::Find::dont_use_nlink) {
795    require Config;
796    $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
797}
798
799# We need a function that checks if a scalar is tainted. Either use the
800# Scalar::Util module's tainted() function or our (slower) pure Perl
801# fallback is_tainted_pp()
802{
803    local $@;
804    eval { require Scalar::Util };
805    *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
806}
807
8081;
809
810__END__
811
812=head1 NAME
813
814File::Find - Traverse a directory tree.
815
816=head1 SYNOPSIS
817
818    use File::Find;
819    find(\&wanted, @directories_to_search);
820    sub wanted { ... }
821
822    use File::Find;
823    finddepth(\&wanted, @directories_to_search);
824    sub wanted { ... }
825
826    use File::Find;
827    find({ wanted => \&process, follow => 1 }, '.');
828
829=head1 DESCRIPTION
830
831These are functions for searching through directory trees doing work
832on each file found similar to the Unix I<find> command.  File::Find
833exports two functions, C<find> and C<finddepth>.  They work similarly
834but have subtle differences.
835
836=over 4
837
838=item B<find>
839
840  find(\&wanted,  @directories);
841  find(\%options, @directories);
842
843C<find()> does a depth-first search over the given C<@directories> in
844the order they are given.  For each file or directory found, it calls
845the C<&wanted> subroutine.  (See below for details on how to use the
846C<&wanted> function).  Additionally, for each directory found, it will
847C<chdir()> into that directory and continue the search, invoking the
848C<&wanted> function on each file or subdirectory in the directory.
849
850=item B<finddepth>
851
852  finddepth(\&wanted,  @directories);
853  finddepth(\%options, @directories);
854
855C<finddepth()> works just like C<find()> except that it invokes the
856C<&wanted> function for a directory I<after> invoking it for the
857directory's contents.  It does a postorder traversal instead of a
858preorder traversal, working from the bottom of the directory tree up
859where C<find()> works from the top of the tree down.
860
861=back
862
863=head2 %options
864
865The first argument to C<find()> is either a code reference to your
866C<&wanted> function, or a hash reference describing the operations
867to be performed for each file.  The
868code reference is described in L</The wanted function> below.
869
870Here are the possible keys for the hash:
871
872=over 3
873
874=item C<wanted>
875
876The value should be a code reference.  This code reference is
877described in L</The wanted function> below. The C<&wanted> subroutine is
878mandatory.
879
880=item C<bydepth>
881
882Reports the name of a directory only AFTER all its entries
883have been reported.  Entry point C<finddepth()> is a shortcut for
884specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
885
886=item C<preprocess>
887
888The value should be a code reference. This code reference is used to
889preprocess the current directory. The name of the currently processed
890directory is in C<$File::Find::dir>. Your preprocessing function is
891called after C<readdir()>, but before the loop that calls the C<wanted()>
892function. It is called with a list of strings (actually file/directory
893names) and is expected to return a list of strings. The code can be
894used to sort the file/directory names alphabetically, numerically,
895or to filter out directory entries based on their name alone. When
896I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
897
898=item C<postprocess>
899
900The value should be a code reference. It is invoked just before leaving
901the currently processed directory. It is called in void context with no
902arguments. The name of the current directory is in C<$File::Find::dir>. This
903hook is handy for summarizing a directory, such as calculating its disk
904usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
905no-op.
906
907=item C<follow>
908
909Causes symbolic links to be followed. Since directory trees with symbolic
910links (followed) may contain files more than once and may even have
911cycles, a hash has to be built up with an entry for each file.
912This might be expensive both in space and time for a large
913directory tree. See L</follow_fast> and L</follow_skip> below.
914If either I<follow> or I<follow_fast> is in effect:
915
916=over 6
917
918=item *
919
920It is guaranteed that an I<lstat> has been called before the user's
921C<wanted()> function is called. This enables fast file checks involving C<_>.
922Note that this guarantee no longer holds if I<follow> or I<follow_fast>
923are not set.
924
925=item *
926
927There is a variable C<$File::Find::fullname> which holds the absolute
928pathname of the file with all symbolic links resolved.  If the link is
929a dangling symbolic link, then fullname will be set to C<undef>.
930
931=back
932
933This is a no-op on Win32.
934
935=item C<follow_fast>
936
937This is similar to I<follow> except that it may report some files more
938than once.  It does detect cycles, however.  Since only symbolic links
939have to be hashed, this is much cheaper both in space and time.  If
940processing a file more than once (by the user's C<wanted()> function)
941is worse than just taking time, the option I<follow> should be used.
942
943This is also a no-op on Win32.
944
945=item C<follow_skip>
946
947C<follow_skip==1>, which is the default, causes all files which are
948neither directories nor symbolic links to be ignored if they are about
949to be processed a second time. If a directory or a symbolic link
950are about to be processed a second time, File::Find dies.
951
952C<follow_skip==0> causes File::Find to die if any file is about to be
953processed a second time.
954
955C<follow_skip==2> causes File::Find to ignore any duplicate files and
956directories but to proceed normally otherwise.
957
958=item C<dangling_symlinks>
959
960Specifies what to do with symbolic links whose target doesn't exist.
961If true and a code reference, will be called with the symbolic link
962name and the directory it lives in as arguments.  Otherwise, if true
963and warnings are on, a warning of the form C<"symbolic_link_name is a dangling
964symbolic link\n"> will be issued.  If false, the dangling symbolic link
965will be silently ignored.
966
967=item C<no_chdir>
968
969Does not C<chdir()> to each directory as it recurses. The C<wanted()>
970function will need to be aware of this, of course. In this case,
971C<$_> will be the same as C<$File::Find::name>.
972
973=item C<untaint>
974
975If find is used in L<taint-mode|perlsec/Taint mode> (-T command line switch or
976if EUID != UID or if EGID != GID), then internally directory names have to be
977untainted before they can be C<chdir>'d to. Therefore they are checked against
978a regular expression I<untaint_pattern>.  Note that all names passed to the
979user's C<wanted()> function are still tainted. If this option is used while not
980in taint-mode, C<untaint> is a no-op.
981
982=item C<untaint_pattern>
983
984See above. This should be set using the C<qr> quoting operator.
985The default is set to C<qr|^([-+@\w./]+)$|>.
986Note that the parentheses are vital.
987
988=item C<untaint_skip>
989
990If set, a directory which fails the I<untaint_pattern> is skipped,
991including all its sub-directories. The default is to C<die> in such a case.
992
993=back
994
995=head2 The wanted function
996
997The C<wanted()> function does whatever verifications you want on
998each file and directory.  Note that despite its name, the C<wanted()>
999function is a generic callback function, and does B<not> tell
1000File::Find if a file is "wanted" or not.  In fact, its return value
1001is ignored.
1002
1003The wanted function takes no arguments but rather does its work
1004through a collection of variables.
1005
1006=over 4
1007
1008=item C<$File::Find::dir> is the current directory name,
1009
1010=item C<$_> is the current filename within that directory
1011
1012=item C<$File::Find::name> is the complete pathname to the file.
1013
1014=back
1015
1016The above variables have all been localized and may be changed without
1017affecting data outside of the wanted function.
1018
1019For example, when examining the file F</some/path/foo.ext> you will have:
1020
1021    $File::Find::dir  = /some/path/
1022    $_                = foo.ext
1023    $File::Find::name = /some/path/foo.ext
1024
1025You are chdir()'d to C<$File::Find::dir> when the function is called,
1026unless C<no_chdir> was specified. Note that when changing to
1027directories is in effect, the root directory (F</>) is a somewhat
1028special case inasmuch as the concatenation of C<$File::Find::dir>,
1029C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
1030table below summarizes all variants:
1031
1032              $File::Find::name  $File::Find::dir  $_
1033 default      /                  /                 .
1034 no_chdir=>0  /etc               /                 etc
1035              /etc/x             /etc              x
1036
1037 no_chdir=>1  /                  /                 /
1038              /etc               /                 /etc
1039              /etc/x             /etc              /etc/x
1040
1041
1042When C<follow> or C<follow_fast> are in effect, there is
1043also a C<$File::Find::fullname>.  The function may set
1044C<$File::Find::prune> to prune the tree unless C<bydepth> was
1045specified.  Unless C<follow> or C<follow_fast> is specified, for
1046compatibility reasons (find.pl, find2perl) there are in addition the
1047following globals available: C<$File::Find::topdir>,
1048C<$File::Find::topdev>, C<$File::Find::topino>,
1049C<$File::Find::topmode> and C<$File::Find::topnlink>.
1050
1051This library is useful for the C<find2perl> tool (distributed as part of the
1052App-find2perl CPAN distribution), which when fed,
1053
1054  find2perl / -name .nfs\* -mtime +7 \
1055    -exec rm -f {} \; -o -fstype nfs -prune
1056
1057produces something like:
1058
1059 sub wanted {
1060    /^\.nfs.*\z/s &&
1061    (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
1062    int(-M _) > 7 &&
1063    unlink($_)
1064    ||
1065    ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
1066    $dev < 0 &&
1067    ($File::Find::prune = 1);
1068 }
1069
1070Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
1071filehandle that caches the information from the preceding
1072C<stat()>, C<lstat()>, or filetest.
1073
1074Here's another interesting wanted function.  It will find all symbolic
1075links that don't resolve:
1076
1077    sub wanted {
1078         -l && !-e && print "bogus link: $File::Find::name\n";
1079    }
1080
1081Note that you may mix directories and (non-directory) files in the list of
1082directories to be searched by the C<wanted()> function.
1083
1084    find(\&wanted, "./foo", "./bar", "./baz/epsilon");
1085
1086In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
1087evaluated by C<wanted()>.
1088
1089See also the script C<pfind> on CPAN for a nice application of this
1090module.
1091
1092=head1 WARNINGS
1093
1094If you run your program with the C<-w> switch, or if you use the
1095C<warnings> pragma, File::Find will report warnings for several weird
1096situations. You can disable these warnings by putting the statement
1097
1098    no warnings 'File::Find';
1099
1100in the appropriate scope. See L<warnings> for more info about lexical
1101warnings.
1102
1103=head1 CAVEAT
1104
1105=over 2
1106
1107=item $dont_use_nlink
1108
1109You can set the variable C<$File::Find::dont_use_nlink> to 1 if you want to
1110force File::Find to always stat directories. This was used for file systems
1111that do not have an C<nlink> count matching the number of sub-directories.
1112Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
1113system) and a couple of others.
1114
1115You shouldn't need to set this variable, since File::Find should now detect
1116such file systems on-the-fly and switch itself to using stat. This works even
1117for parts of your file system, like a mounted CD-ROM.
1118
1119If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
1120
1121=item symlinks
1122
1123Be aware that the option to follow symbolic links can be dangerous.
1124Depending on the structure of the directory tree (including symbolic
1125links to directories) you might traverse a given (physical) directory
1126more than once (only if C<follow_fast> is in effect).
1127Furthermore, deleting or changing files in a symbolically linked directory
1128might cause very unpleasant surprises, since you delete or change files
1129in an unknown directory.
1130
1131=back
1132
1133=head1 BUGS AND CAVEATS
1134
1135Despite the name of the C<finddepth()> function, both C<find()> and
1136C<finddepth()> perform a depth-first search of the directory
1137hierarchy.
1138
1139=head1 HISTORY
1140
1141File::Find used to produce incorrect results if called recursively.
1142During the development of perl 5.8 this bug was fixed.
1143The first fixed version of File::Find was 1.01.
1144
1145=head1 SEE ALSO
1146
1147L<find(1)>, find2perl.
1148
1149=cut
1150