xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/File/DosGlob.pm (revision 0:68f95e015346)
1#!perl -w
2
3# use strict fails
4#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191.
5
6#
7# Documentation at the __END__
8#
9
10package File::DosGlob;
11
12our $VERSION = '1.00';
13use strict;
14use warnings;
15
16sub doglob {
17    my $cond = shift;
18    my @retval = ();
19    #print "doglob: ", join('|', @_), "\n";
20  OUTER:
21    for my $pat (@_) {
22	my @matched = ();
23	my @globdirs = ();
24	my $head = '.';
25	my $sepchr = '/';
26        my $tail;
27	next OUTER unless defined $pat and $pat ne '';
28	# if arg is within quotes strip em and do no globbing
29	if ($pat =~ /^"(.*)"\z/s) {
30	    $pat = $1;
31	    if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
32	    else              { push(@retval, $pat) if -e $pat }
33	    next OUTER;
34	}
35	# wildcards with a drive prefix such as h:*.pm must be changed
36	# to h:./*.pm to expand correctly
37	if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
38	    substr($_,0,2) = $1 . "./";
39	}
40	if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
41	    ($head, $sepchr, $tail) = ($1,$2,$3);
42	    #print "div: |$head|$sepchr|$tail|\n";
43	    push (@retval, $pat), next OUTER if $tail eq '';
44	    if ($head =~ /[*?]/) {
45		@globdirs = doglob('d', $head);
46		push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
47		    next OUTER if @globdirs;
48	    }
49	    $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
50	    $pat = $tail;
51	}
52	#
53	# If file component has no wildcards, we can avoid opendir
54	unless ($pat =~ /[*?]/) {
55	    $head = '' if $head eq '.';
56	    $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
57	    $head .= $pat;
58	    if ($cond eq 'd') { push(@retval,$head) if -d $head }
59	    else              { push(@retval,$head) if -e $head }
60	    next OUTER;
61	}
62	opendir(D, $head) or next OUTER;
63	my @leaves = readdir D;
64	closedir D;
65	$head = '' if $head eq '.';
66	$head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
67
68	# escape regex metachars but not glob chars
69        $pat =~ s:([].+^\-\${}[|]):\\$1:g;
70	# and convert DOS-style wildcards to regex
71	$pat =~ s/\*/.*/g;
72	$pat =~ s/\?/.?/g;
73
74	#print "regex: '$pat', head: '$head'\n";
75	my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
76      INNER:
77	for my $e (@leaves) {
78	    next INNER if $e eq '.' or $e eq '..';
79	    next INNER if $cond eq 'd' and ! -d "$head$e";
80	    push(@matched, "$head$e"), next INNER if &$matchsub($e);
81	    #
82	    # [DOS compatibility special case]
83	    # Failed, add a trailing dot and try again, but only
84	    # if name does not have a dot in it *and* pattern
85	    # has a dot *and* name is shorter than 9 chars.
86	    #
87	    if (index($e,'.') == -1 and length($e) < 9
88	        and index($pat,'\\.') != -1) {
89		push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
90	    }
91	}
92	push @retval, @matched if @matched;
93    }
94    return @retval;
95}
96
97
98#
99# Do DOS-like globbing on Mac OS
100#
101sub doglob_Mac {
102    my $cond = shift;
103    my @retval = ();
104
105	#print "doglob_Mac: ", join('|', @_), "\n";
106  OUTER:
107    for my $arg (@_) {
108        local $_ = $arg;
109	my @matched = ();
110	my @globdirs = ();
111	my $head = ':';
112	my $not_esc_head = $head;
113	my $sepchr = ':';
114	next OUTER unless defined $_ and $_ ne '';
115	# if arg is within quotes strip em and do no globbing
116	if (/^"(.*)"\z/s) {
117	    $_ = $1;
118		# $_ may contain escaped metachars '\*', '\?' and '\'
119	        my $not_esc_arg = $_;
120		$not_esc_arg =~ s/\\([*?\\])/$1/g;
121	    if ($cond eq 'd') { push(@retval, $not_esc_arg) if -d $not_esc_arg }
122	    else              { push(@retval, $not_esc_arg) if -e $not_esc_arg }
123	    next OUTER;
124	}
125
126	if (m|^(.*?)(:+)([^:]*)\z|s) { # note: $1 is not greedy
127	    my $tail;
128	    ($head, $sepchr, $tail) = ($1,$2,$3);
129	    #print "div: |$head|$sepchr|$tail|\n";
130	    push (@retval, $_), next OUTER if $tail eq '';
131		#
132		# $head may contain escaped metachars '\*' and '\?'
133
134		my $tmp_head = $head;
135		# if a '*' or '?' is preceded by an odd count of '\', temporary delete
136		# it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
137		# wildcards
138		$tmp_head =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
139
140		if ($tmp_head =~ /[*?]/) { # if there are wildcards ...
141		@globdirs = doglob_Mac('d', $head);
142		push(@retval, doglob_Mac($cond, map {"$_$sepchr$tail"} @globdirs)),
143		    next OUTER if @globdirs;
144	    }
145
146		$head .= $sepchr;
147		$not_esc_head = $head;
148		# unescape $head for file operations
149		$not_esc_head =~ s/\\([*?\\])/$1/g;
150	    $_ = $tail;
151	}
152	#
153	# If file component has no wildcards, we can avoid opendir
154
155	my $tmp_tail = $_;
156	# if a '*' or '?' is preceded by an odd count of '\', temporary delete
157	# it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
158	# wildcards
159	$tmp_tail =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
160
161	unless ($tmp_tail =~ /[*?]/) { # if there are wildcards ...
162	    $not_esc_head = $head = '' if $head eq ':';
163	    my $not_esc_tail = $_;
164	    # unescape $head and $tail for file operations
165	    $not_esc_tail =~ s/\\([*?\\])/$1/g;
166	    $head .= $_;
167		$not_esc_head .= $not_esc_tail;
168	    if ($cond eq 'd') { push(@retval,$head) if -d $not_esc_head }
169	    else              { push(@retval,$head) if -e $not_esc_head }
170	    next OUTER;
171	}
172	#print "opendir($not_esc_head)\n";
173	opendir(D, $not_esc_head) or next OUTER;
174	my @leaves = readdir D;
175	closedir D;
176
177	# escape regex metachars but not '\' and glob chars '*', '?'
178	$_ =~ s:([].+^\-\${}[|]):\\$1:g;
179	# and convert DOS-style wildcards to regex,
180	# but only if they are not escaped
181	$_ =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
182
183	#print "regex: '$_', head: '$head', unescaped head: '$not_esc_head'\n";
184	my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '\\z|ios }';
185	warn($@), next OUTER if $@;
186      INNER:
187	for my $e (@leaves) {
188	    next INNER if $e eq '.' or $e eq '..';
189	    next INNER if $cond eq 'd' and ! -d "$not_esc_head$e";
190
191		if (&$matchsub($e)) {
192			my $leave = (($not_esc_head eq ':') && (-f "$not_esc_head$e")) ?
193		            	"$e" : "$not_esc_head$e";
194			#
195			# On Mac OS, the two glob metachars '*' and '?' and the escape
196			# char '\' are valid characters for file and directory names.
197			# We have to escape and treat them specially.
198			$leave =~ s|([*?\\])|\\$1|g;
199			push(@matched, $leave);
200			next INNER;
201		}
202	}
203	push @retval, @matched if @matched;
204    }
205    return @retval;
206}
207
208#
209# _expand_volume() will only be used on Mac OS (Classic):
210# Takes an array of original patterns as argument and returns an array of
211# possibly modified patterns. Each original pattern is processed like
212# that:
213# + If there's a volume name in the pattern, we push a separate pattern
214#   for each mounted volume that matches (with '*', '?' and '\' escaped).
215# + If there's no volume name in the original pattern, it is pushed
216#   unchanged.
217# Note that the returned array of patterns may be empty.
218#
219sub _expand_volume {
220
221	require MacPerl; # to be verbose
222
223	my @pat = @_;
224	my @new_pat = ();
225	my @FSSpec_Vols = MacPerl::Volumes();
226	my @mounted_volumes = ();
227
228	foreach my $spec_vol (@FSSpec_Vols) {
229		# push all mounted volumes into array
230     	push @mounted_volumes, MacPerl::MakePath($spec_vol);
231	}
232	#print "mounted volumes: |@mounted_volumes|\n";
233
234	while (@pat) {
235		my $pat = shift @pat;
236		if ($pat =~ /^([^:]+:)(.*)\z/) { # match a volume name?
237			my $vol_pat = $1;
238			my $tail = $2;
239			#
240			# escape regex metachars but not '\' and glob chars '*', '?'
241			$vol_pat =~ s:([].+^\-\${}[|]):\\$1:g;
242			# and convert DOS-style wildcards to regex,
243			# but only if they are not escaped
244			$vol_pat =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
245			#print "volume regex: '$vol_pat' \n";
246
247			foreach my $volume (@mounted_volumes) {
248				if ($volume =~ m|^$vol_pat\z|ios) {
249					#
250					# On Mac OS, the two glob metachars '*' and '?' and the
251					# escape char '\' are valid characters for volume names.
252					# We have to escape and treat them specially.
253					$volume =~ s|([*?\\])|\\$1|g;
254					push @new_pat, $volume . $tail;
255				}
256			}
257		} else { # no volume name in pattern, push original pattern
258			push @new_pat, $pat;
259		}
260	}
261	return @new_pat;
262}
263
264
265#
266# _preprocess_pattern() will only be used on Mac OS (Classic):
267# Resolves any updirs in the pattern. Removes a single trailing colon
268# from the pattern, unless it's a volume name pattern like "*HD:"
269#
270sub _preprocess_pattern {
271	my @pat = @_;
272
273	foreach my $p (@pat) {
274		my $proceed;
275		# resolve any updirs, e.g. "*HD:t?p::a*" -> "*HD:a*"
276		do {
277			$proceed = ($p =~ s/^(.*):[^:]+::(.*?)\z/$1:$2/);
278		} while ($proceed);
279		# remove a single trailing colon, e.g. ":*:" -> ":*"
280		$p =~ s/:([^:]+):\z/:$1/;
281	}
282	return @pat;
283}
284
285
286#
287# _un_escape() will only be used on Mac OS (Classic):
288# Unescapes a list of arguments which may contain escaped
289# metachars '*', '?' and '\'.
290#
291sub _un_escape {
292	foreach (@_) {
293		s/\\([*?\\])/$1/g;
294	}
295	return @_;
296}
297
298#
299# this can be used to override CORE::glob in a specific
300# package by saying C<use File::DosGlob 'glob';> in that
301# namespace.
302#
303
304# context (keyed by second cxix arg provided by core)
305my %iter;
306my %entries;
307
308sub glob {
309    my($pat,$cxix) = @_;
310    my @pat;
311
312    # glob without args defaults to $_
313    $pat = $_ unless defined $pat;
314
315    # extract patterns
316    if ($pat =~ /\s/) {
317	require Text::ParseWords;
318	@pat = Text::ParseWords::parse_line('\s+',0,$pat);
319    }
320    else {
321	push @pat, $pat;
322    }
323
324    # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
325    #   abc3 will be the original {3} (and drop the {}).
326    #   abc1 abc2 will be put in @appendpat.
327    # This was just the esiest way, not nearly the best.
328    REHASH: {
329	my @appendpat = ();
330	for (@pat) {
331	    # There must be a "," I.E. abc{efg} is not what we want.
332	    while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
333		my ($start, $match, $end) = ($1, $2, $3);
334		#print "Got: \n\t$start\n\t$match\n\t$end\n";
335		my $tmp = "$start$match$end";
336		while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
337		    #print "Striped: $tmp\n";
338		    #  these expanshions will be preformed by the original,
339		    #  when we call REHASH.
340		}
341		push @appendpat, ("$tmp");
342		s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
343		if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
344		    $match = $1;
345		    #print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
346		    $_ = "$start$match$end";
347		}
348	    }
349	    #print "Sould have "GOT" vs "Got"!\n";
350		#FIXME: There should be checking for this.
351		#  How or what should be done about failure is beond me.
352	}
353	if ( $#appendpat != -1
354		) {
355	    #print "LOOP\n";
356	    #FIXME: Max loop, no way! :")
357	    for ( @appendpat ) {
358	        push @pat, $_;
359	    }
360	    goto REHASH;
361	}
362    }
363    for ( @pat ) {
364	s/\\{/{/g;
365	s/\\}/}/g;
366	s/\\,/,/g;
367    }
368    #print join ("\n", @pat). "\n";
369
370    # assume global context if not provided one
371    $cxix = '_G_' unless defined $cxix;
372    $iter{$cxix} = 0 unless exists $iter{$cxix};
373
374    # if we're just beginning, do it all first
375    if ($iter{$cxix} == 0) {
376	if ($^O eq 'MacOS') {
377		# first, take care of updirs and trailing colons
378		@pat = _preprocess_pattern(@pat);
379		# expand volume names
380		@pat = _expand_volume(@pat);
381		$entries{$cxix} = (@pat) ? [_un_escape( doglob_Mac(1,@pat) )] : [()];
382	} else {
383		$entries{$cxix} = [doglob(1,@pat)];
384    }
385	}
386
387    # chuck it all out, quick or slow
388    if (wantarray) {
389	delete $iter{$cxix};
390	return @{delete $entries{$cxix}};
391    }
392    else {
393	if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
394	    return shift @{$entries{$cxix}};
395	}
396	else {
397	    # return undef for EOL
398	    delete $iter{$cxix};
399	    delete $entries{$cxix};
400	    return undef;
401	}
402    }
403}
404
405{
406    no strict 'refs';
407
408    sub import {
409    my $pkg = shift;
410    return unless @_;
411    my $sym = shift;
412    my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
413    *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
414    }
415}
4161;
417
418__END__
419
420=head1 NAME
421
422File::DosGlob - DOS like globbing and then some
423
424=head1 SYNOPSIS
425
426    require 5.004;
427
428    # override CORE::glob in current package
429    use File::DosGlob 'glob';
430
431    # override CORE::glob in ALL packages (use with extreme caution!)
432    use File::DosGlob 'GLOBAL_glob';
433
434    @perlfiles = glob  "..\\pe?l/*.p?";
435    print <..\\pe?l/*.p?>;
436
437    # from the command line (overrides only in main::)
438    > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
439
440=head1 DESCRIPTION
441
442A module that implements DOS-like globbing with a few enhancements.
443It is largely compatible with perlglob.exe (the M$ setargv.obj
444version) in all but one respect--it understands wildcards in
445directory components.
446
447For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in
448that it will find something like '..\lib\File/DosGlob.pm' alright).
449Note that all path components are case-insensitive, and that
450backslashes and forward slashes are both accepted, and preserved.
451You may have to double the backslashes if you are putting them in
452literally, due to double-quotish parsing of the pattern by perl.
453
454Spaces in the argument delimit distinct patterns, so
455C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
456or C<.dll>.  If you want to put in literal spaces in the glob
457pattern, you can escape them with either double quotes, or backslashes.
458e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
459C<glob('c:/Program\ Files/*/*.dll')>.  The argument is tokenized using
460C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
461of the quoting rules used.
462
463Extending it to csh patterns is left as an exercise to the reader.
464
465=head1 NOTES
466
467=over 4
468
469=item *
470
471Mac OS (Classic) users should note a few differences. The specification
472of pathnames in glob patterns adheres to the usual Mac OS conventions:
473The path separator is a colon ':', not a slash '/' or backslash '\'. A
474full path always begins with a volume name. A relative pathname on Mac
475OS must always begin with a ':', except when specifying a file or
476directory name in the current working directory, where the leading colon
477is optional. If specifying a volume name only, a trailing ':' is
478required. Due to these rules, a glob like E<lt>*:E<gt> will find all
479mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
480all files and directories in the current directory.
481
482Note that updirs in the glob pattern are resolved before the matching begins,
483i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
484that a single trailing ':' in the pattern is ignored (unless it's a volume
485name pattern like "*HD:"), i.e. a glob like <:*:> will find both directories
486I<and> files (and not, as one might expect, only directories).
487
488The metachars '*', '?' and the escape char '\' are valid characters in
489volume, directory and file names on Mac OS. Hence, if you want to match
490a '*', '?' or '\' literally, you have to escape these characters. Due to
491perl's quoting rules, things may get a bit complicated, when you want to
492match a string like '\*' literally, or when you want to match '\' literally,
493but treat the immediately following character '*' as metachar. So, here's a
494rule of thumb (applies to both single- and double-quoted strings): escape
495each '*' or '?' or '\' with a backslash, if you want to treat them literally,
496and then double each backslash and your are done. E.g.
497
498- Match '\*' literally
499
500   escape both '\' and '*'  : '\\\*'
501   double the backslashes   : '\\\\\\*'
502
503(Internally, the glob routine sees a '\\\*', which means that both '\' and
504'*' are escaped.)
505
506
507- Match '\' literally, treat '*' as metachar
508
509   escape '\' but not '*'   : '\\*'
510   double the backslashes   : '\\\\*'
511
512(Internally, the glob routine sees a '\\*', which means that '\' is escaped and
513'*' is not.)
514
515Note that you also have to quote literal spaces in the glob pattern, as described
516above.
517
518=back
519
520=head1 EXPORTS (by request only)
521
522glob()
523
524=head1 BUGS
525
526Should probably be built into the core, and needs to stop
527pandering to DOS habits.  Needs a dose of optimizium too.
528
529=head1 AUTHOR
530
531Gurusamy Sarathy <gsar@activestate.com>
532
533=head1 HISTORY
534
535=over 4
536
537=item *
538
539Support for globally overriding glob() (GSAR 3-JUN-98)
540
541=item *
542
543Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
544
545=item *
546
547A few dir-vs-file optimizations result in glob importation being
54810 times faster than using perlglob.exe, and using perlglob.bat is
549only twice as slow as perlglob.exe (GSAR 28-MAY-97)
550
551=item *
552
553Several cleanups prompted by lack of compatible perlglob.exe
554under Borland (GSAR 27-MAY-97)
555
556=item *
557
558Initial version (GSAR 20-FEB-97)
559
560=back
561
562=head1 SEE ALSO
563
564perl
565
566perlglob.bat
567
568Text::ParseWords
569
570=cut
571
572