xref: /openbsd-src/gnu/usr.bin/perl/ext/File-DosGlob/lib/File/DosGlob.pm (revision 6fb12b7054efc6b436584db6cef9c2f85c0d7e27)
191f110e0Safresh1#!perl -w
291f110e0Safresh1
391f110e0Safresh1#
491f110e0Safresh1# Documentation at the __END__
591f110e0Safresh1#
691f110e0Safresh1
791f110e0Safresh1package File::DosGlob;
891f110e0Safresh1
9*6fb12b70Safresh1our $VERSION = '1.12';
1091f110e0Safresh1use strict;
1191f110e0Safresh1use warnings;
1291f110e0Safresh1
1391f110e0Safresh1require XSLoader;
1491f110e0Safresh1XSLoader::load();
1591f110e0Safresh1
1691f110e0Safresh1sub doglob {
1791f110e0Safresh1    my $cond = shift;
1891f110e0Safresh1    my @retval = ();
1991f110e0Safresh1    my $fix_drive_relative_paths;
2091f110e0Safresh1  OUTER:
2191f110e0Safresh1    for my $pat (@_) {
2291f110e0Safresh1	my @matched = ();
2391f110e0Safresh1	my @globdirs = ();
2491f110e0Safresh1	my $head = '.';
2591f110e0Safresh1	my $sepchr = '/';
2691f110e0Safresh1        my $tail;
2791f110e0Safresh1	next OUTER unless defined $pat and $pat ne '';
2891f110e0Safresh1	# if arg is within quotes strip em and do no globbing
2991f110e0Safresh1	if ($pat =~ /^"(.*)"\z/s) {
3091f110e0Safresh1	    $pat = $1;
3191f110e0Safresh1	    if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
3291f110e0Safresh1	    else              { push(@retval, $pat) if -e $pat }
3391f110e0Safresh1	    next OUTER;
3491f110e0Safresh1	}
3591f110e0Safresh1	# wildcards with a drive prefix such as h:*.pm must be changed
3691f110e0Safresh1	# to h:./*.pm to expand correctly
3791f110e0Safresh1	if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
3891f110e0Safresh1	    substr($pat,0,2) = $1 . "./";
3991f110e0Safresh1	    $fix_drive_relative_paths = 1;
4091f110e0Safresh1	}
4191f110e0Safresh1	if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
4291f110e0Safresh1	    ($head, $sepchr, $tail) = ($1,$2,$3);
4391f110e0Safresh1	    push (@retval, $pat), next OUTER if $tail eq '';
4491f110e0Safresh1	    if ($head =~ /[*?]/) {
4591f110e0Safresh1		@globdirs = doglob('d', $head);
4691f110e0Safresh1		push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
4791f110e0Safresh1		    next OUTER if @globdirs;
4891f110e0Safresh1	    }
4991f110e0Safresh1	    $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
5091f110e0Safresh1	    $pat = $tail;
5191f110e0Safresh1	}
5291f110e0Safresh1	#
5391f110e0Safresh1	# If file component has no wildcards, we can avoid opendir
5491f110e0Safresh1	unless ($pat =~ /[*?]/) {
5591f110e0Safresh1	    $head = '' if $head eq '.';
5691f110e0Safresh1	    $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
5791f110e0Safresh1	    $head .= $pat;
5891f110e0Safresh1	    if ($cond eq 'd') { push(@retval,$head) if -d $head }
5991f110e0Safresh1	    else              { push(@retval,$head) if -e $head }
6091f110e0Safresh1	    next OUTER;
6191f110e0Safresh1	}
6291f110e0Safresh1	opendir(D, $head) or next OUTER;
6391f110e0Safresh1	my @leaves = readdir D;
6491f110e0Safresh1	closedir D;
6591f110e0Safresh1
6691f110e0Safresh1	# VMS-format filespecs, especially if they contain extended characters,
6791f110e0Safresh1	# are unlikely to match patterns correctly, so Unixify them.
6891f110e0Safresh1	if ($^O eq 'VMS') {
6991f110e0Safresh1	    require VMS::Filespec;
7091f110e0Safresh1	    @leaves = map {$_ =~ s/\.$//; VMS::Filespec::unixify($_)} @leaves;
7191f110e0Safresh1        }
7291f110e0Safresh1	$head = '' if $head eq '.';
7391f110e0Safresh1	$head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
7491f110e0Safresh1
7591f110e0Safresh1	# escape regex metachars but not glob chars
7691f110e0Safresh1	$pat =~ s:([].+^\-\${}()[|]):\\$1:g;
7791f110e0Safresh1	# and convert DOS-style wildcards to regex
7891f110e0Safresh1	$pat =~ s/\*/.*/g;
7991f110e0Safresh1	$pat =~ s/\?/.?/g;
8091f110e0Safresh1
8191f110e0Safresh1	my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
8291f110e0Safresh1      INNER:
8391f110e0Safresh1	for my $e (@leaves) {
8491f110e0Safresh1	    next INNER if $e eq '.' or $e eq '..';
8591f110e0Safresh1	    next INNER if $cond eq 'd' and ! -d "$head$e";
8691f110e0Safresh1	    push(@matched, "$head$e"), next INNER if &$matchsub($e);
8791f110e0Safresh1	    #
8891f110e0Safresh1	    # [DOS compatibility special case]
8991f110e0Safresh1	    # Failed, add a trailing dot and try again, but only
9091f110e0Safresh1	    # if name does not have a dot in it *and* pattern
9191f110e0Safresh1	    # has a dot *and* name is shorter than 9 chars.
9291f110e0Safresh1	    #
9391f110e0Safresh1	    if (index($e,'.') == -1 and length($e) < 9
9491f110e0Safresh1	        and index($pat,'\\.') != -1) {
9591f110e0Safresh1		push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
9691f110e0Safresh1	    }
9791f110e0Safresh1	}
9891f110e0Safresh1	push @retval, @matched if @matched;
9991f110e0Safresh1    }
10091f110e0Safresh1    if ($fix_drive_relative_paths) {
10191f110e0Safresh1	s|^([A-Za-z]:)\./|$1| for @retval;
10291f110e0Safresh1    }
10391f110e0Safresh1    return @retval;
10491f110e0Safresh1}
10591f110e0Safresh1
10691f110e0Safresh1#
10791f110e0Safresh1# this can be used to override CORE::glob in a specific
10891f110e0Safresh1# package by saying C<use File::DosGlob 'glob';> in that
10991f110e0Safresh1# namespace.
11091f110e0Safresh1#
11191f110e0Safresh1
11291f110e0Safresh1# context (keyed by second cxix arg provided by core)
11391f110e0Safresh1our %entries;
11491f110e0Safresh1
11591f110e0Safresh1sub glob {
11691f110e0Safresh1    my($pat,$cxix) = ($_[0], _callsite());
11791f110e0Safresh1    my @pat;
11891f110e0Safresh1
11991f110e0Safresh1    # glob without args defaults to $_
12091f110e0Safresh1    $pat = $_ unless defined $pat;
12191f110e0Safresh1
12291f110e0Safresh1    # if we're just beginning, do it all first
12391f110e0Safresh1    if (!$entries{$cxix}) {
12491f110e0Safresh1      # extract patterns
12591f110e0Safresh1      if ($pat =~ /\s/) {
12691f110e0Safresh1	require Text::ParseWords;
12791f110e0Safresh1	@pat = Text::ParseWords::parse_line('\s+',0,$pat);
12891f110e0Safresh1      }
12991f110e0Safresh1      else {
13091f110e0Safresh1	push @pat, $pat;
13191f110e0Safresh1      }
13291f110e0Safresh1
13391f110e0Safresh1      # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
13491f110e0Safresh1      #   abc3 will be the original {3} (and drop the {}).
13591f110e0Safresh1      #   abc1 abc2 will be put in @appendpat.
13691f110e0Safresh1      # This was just the easiest way, not nearly the best.
13791f110e0Safresh1      REHASH: {
13891f110e0Safresh1	my @appendpat = ();
13991f110e0Safresh1	for (@pat) {
14091f110e0Safresh1	    # There must be a "," I.E. abc{efg} is not what we want.
14191f110e0Safresh1	    while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
14291f110e0Safresh1		my ($start, $match, $end) = ($1, $2, $3);
14391f110e0Safresh1		#print "Got: \n\t$start\n\t$match\n\t$end\n";
14491f110e0Safresh1		my $tmp = "$start$match$end";
14591f110e0Safresh1		while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
14691f110e0Safresh1		    #  these expansions will be performed by the original,
14791f110e0Safresh1		    #  when we call REHASH.
14891f110e0Safresh1		}
14991f110e0Safresh1		push @appendpat, ("$tmp");
15091f110e0Safresh1		s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
15191f110e0Safresh1		if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
15291f110e0Safresh1		    $match = $1;
15391f110e0Safresh1		    #print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
15491f110e0Safresh1		    $_ = "$start$match$end";
15591f110e0Safresh1		}
15691f110e0Safresh1	    }
15791f110e0Safresh1	    #print "Sould have "GOT" vs "Got"!\n";
15891f110e0Safresh1		#FIXME: There should be checking for this.
159*6fb12b70Safresh1		#  How or what should be done about failure is beyond me.
16091f110e0Safresh1	}
16191f110e0Safresh1	if ( $#appendpat != -1
16291f110e0Safresh1		) {
16391f110e0Safresh1	    #FIXME: Max loop, no way! :")
16491f110e0Safresh1	    for ( @appendpat ) {
16591f110e0Safresh1	        push @pat, $_;
16691f110e0Safresh1	    }
16791f110e0Safresh1	    goto REHASH;
16891f110e0Safresh1	}
16991f110e0Safresh1      }
17091f110e0Safresh1      for ( @pat ) {
17191f110e0Safresh1	s/\\([{},])/$1/g;
17291f110e0Safresh1      }
17391f110e0Safresh1
17491f110e0Safresh1      $entries{$cxix} = [doglob(1,@pat)];
17591f110e0Safresh1    }
17691f110e0Safresh1
17791f110e0Safresh1    # chuck it all out, quick or slow
17891f110e0Safresh1    if (wantarray) {
17991f110e0Safresh1	return @{delete $entries{$cxix}};
18091f110e0Safresh1    }
18191f110e0Safresh1    else {
18291f110e0Safresh1	if (scalar @{$entries{$cxix}}) {
18391f110e0Safresh1	    return shift @{$entries{$cxix}};
18491f110e0Safresh1	}
18591f110e0Safresh1	else {
18691f110e0Safresh1	    # return undef for EOL
18791f110e0Safresh1	    delete $entries{$cxix};
18891f110e0Safresh1	    return undef;
18991f110e0Safresh1	}
19091f110e0Safresh1    }
19191f110e0Safresh1}
19291f110e0Safresh1
19391f110e0Safresh1{
19491f110e0Safresh1    no strict 'refs';
19591f110e0Safresh1
19691f110e0Safresh1    sub import {
19791f110e0Safresh1    my $pkg = shift;
19891f110e0Safresh1    return unless @_;
19991f110e0Safresh1    my $sym = shift;
20091f110e0Safresh1    my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
20191f110e0Safresh1    *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
20291f110e0Safresh1    }
20391f110e0Safresh1}
20491f110e0Safresh11;
20591f110e0Safresh1
20691f110e0Safresh1__END__
20791f110e0Safresh1
20891f110e0Safresh1=head1 NAME
20991f110e0Safresh1
21091f110e0Safresh1File::DosGlob - DOS like globbing and then some
21191f110e0Safresh1
21291f110e0Safresh1=head1 SYNOPSIS
21391f110e0Safresh1
21491f110e0Safresh1    require 5.004;
21591f110e0Safresh1
21691f110e0Safresh1    # override CORE::glob in current package
21791f110e0Safresh1    use File::DosGlob 'glob';
21891f110e0Safresh1
21991f110e0Safresh1    # override CORE::glob in ALL packages (use with extreme caution!)
22091f110e0Safresh1    use File::DosGlob 'GLOBAL_glob';
22191f110e0Safresh1
22291f110e0Safresh1    @perlfiles = glob  "..\\pe?l/*.p?";
22391f110e0Safresh1    print <..\\pe?l/*.p?>;
22491f110e0Safresh1
22591f110e0Safresh1    # from the command line (overrides only in main::)
22691f110e0Safresh1    > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
22791f110e0Safresh1
22891f110e0Safresh1=head1 DESCRIPTION
22991f110e0Safresh1
23091f110e0Safresh1A module that implements DOS-like globbing with a few enhancements.
23191f110e0Safresh1It is largely compatible with perlglob.exe (the M$ setargv.obj
23291f110e0Safresh1version) in all but one respect--it understands wildcards in
23391f110e0Safresh1directory components.
23491f110e0Safresh1
23591f110e0Safresh1For example, C<< <..\\l*b\\file/*glob.p?> >> will work as expected (in
23691f110e0Safresh1that it will find something like '..\lib\File/DosGlob.pm' alright).
23791f110e0Safresh1Note that all path components are case-insensitive, and that
23891f110e0Safresh1backslashes and forward slashes are both accepted, and preserved.
23991f110e0Safresh1You may have to double the backslashes if you are putting them in
24091f110e0Safresh1literally, due to double-quotish parsing of the pattern by perl.
24191f110e0Safresh1
24291f110e0Safresh1Spaces in the argument delimit distinct patterns, so
24391f110e0Safresh1C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
24491f110e0Safresh1or C<.dll>.  If you want to put in literal spaces in the glob
24591f110e0Safresh1pattern, you can escape them with either double quotes, or backslashes.
24691f110e0Safresh1e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
24791f110e0Safresh1C<glob('c:/Program\ Files/*/*.dll')>.  The argument is tokenized using
24891f110e0Safresh1C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
24991f110e0Safresh1of the quoting rules used.
25091f110e0Safresh1
25191f110e0Safresh1Extending it to csh patterns is left as an exercise to the reader.
25291f110e0Safresh1
25391f110e0Safresh1=head1 EXPORTS (by request only)
25491f110e0Safresh1
25591f110e0Safresh1glob()
25691f110e0Safresh1
25791f110e0Safresh1=head1 BUGS
25891f110e0Safresh1
25991f110e0Safresh1Should probably be built into the core, and needs to stop
260*6fb12b70Safresh1pandering to DOS habits.  Needs a dose of optimization too.
26191f110e0Safresh1
26291f110e0Safresh1=head1 AUTHOR
26391f110e0Safresh1
26491f110e0Safresh1Gurusamy Sarathy <gsar@activestate.com>
26591f110e0Safresh1
26691f110e0Safresh1=head1 HISTORY
26791f110e0Safresh1
26891f110e0Safresh1=over 4
26991f110e0Safresh1
27091f110e0Safresh1=item *
27191f110e0Safresh1
27291f110e0Safresh1Support for globally overriding glob() (GSAR 3-JUN-98)
27391f110e0Safresh1
27491f110e0Safresh1=item *
27591f110e0Safresh1
27691f110e0Safresh1Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
27791f110e0Safresh1
27891f110e0Safresh1=item *
27991f110e0Safresh1
28091f110e0Safresh1A few dir-vs-file optimizations result in glob importation being
28191f110e0Safresh110 times faster than using perlglob.exe, and using perlglob.bat is
28291f110e0Safresh1only twice as slow as perlglob.exe (GSAR 28-MAY-97)
28391f110e0Safresh1
28491f110e0Safresh1=item *
28591f110e0Safresh1
28691f110e0Safresh1Several cleanups prompted by lack of compatible perlglob.exe
28791f110e0Safresh1under Borland (GSAR 27-MAY-97)
28891f110e0Safresh1
28991f110e0Safresh1=item *
29091f110e0Safresh1
29191f110e0Safresh1Initial version (GSAR 20-FEB-97)
29291f110e0Safresh1
29391f110e0Safresh1=back
29491f110e0Safresh1
29591f110e0Safresh1=head1 SEE ALSO
29691f110e0Safresh1
29791f110e0Safresh1perl
29891f110e0Safresh1
29991f110e0Safresh1perlglob.bat
30091f110e0Safresh1
30191f110e0Safresh1Text::ParseWords
30291f110e0Safresh1
30391f110e0Safresh1=cut
30491f110e0Safresh1
305