191f110e0Safresh1#!perl -w 291f110e0Safresh1 391f110e0Safresh1# 491f110e0Safresh1# Documentation at the __END__ 591f110e0Safresh1# 691f110e0Safresh1 791f110e0Safresh1package File::DosGlob; 891f110e0Safresh1 9*6fb12b70Safresh1our $VERSION = '1.12'; 1091f110e0Safresh1use strict; 1191f110e0Safresh1use warnings; 1291f110e0Safresh1 1391f110e0Safresh1require XSLoader; 1491f110e0Safresh1XSLoader::load(); 1591f110e0Safresh1 1691f110e0Safresh1sub doglob { 1791f110e0Safresh1 my $cond = shift; 1891f110e0Safresh1 my @retval = (); 1991f110e0Safresh1 my $fix_drive_relative_paths; 2091f110e0Safresh1 OUTER: 2191f110e0Safresh1 for my $pat (@_) { 2291f110e0Safresh1 my @matched = (); 2391f110e0Safresh1 my @globdirs = (); 2491f110e0Safresh1 my $head = '.'; 2591f110e0Safresh1 my $sepchr = '/'; 2691f110e0Safresh1 my $tail; 2791f110e0Safresh1 next OUTER unless defined $pat and $pat ne ''; 2891f110e0Safresh1 # if arg is within quotes strip em and do no globbing 2991f110e0Safresh1 if ($pat =~ /^"(.*)"\z/s) { 3091f110e0Safresh1 $pat = $1; 3191f110e0Safresh1 if ($cond eq 'd') { push(@retval, $pat) if -d $pat } 3291f110e0Safresh1 else { push(@retval, $pat) if -e $pat } 3391f110e0Safresh1 next OUTER; 3491f110e0Safresh1 } 3591f110e0Safresh1 # wildcards with a drive prefix such as h:*.pm must be changed 3691f110e0Safresh1 # to h:./*.pm to expand correctly 3791f110e0Safresh1 if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) { 3891f110e0Safresh1 substr($pat,0,2) = $1 . "./"; 3991f110e0Safresh1 $fix_drive_relative_paths = 1; 4091f110e0Safresh1 } 4191f110e0Safresh1 if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) { 4291f110e0Safresh1 ($head, $sepchr, $tail) = ($1,$2,$3); 4391f110e0Safresh1 push (@retval, $pat), next OUTER if $tail eq ''; 4491f110e0Safresh1 if ($head =~ /[*?]/) { 4591f110e0Safresh1 @globdirs = doglob('d', $head); 4691f110e0Safresh1 push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)), 4791f110e0Safresh1 next OUTER if @globdirs; 4891f110e0Safresh1 } 4991f110e0Safresh1 $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s; 5091f110e0Safresh1 $pat = $tail; 5191f110e0Safresh1 } 5291f110e0Safresh1 # 5391f110e0Safresh1 # If file component has no wildcards, we can avoid opendir 5491f110e0Safresh1 unless ($pat =~ /[*?]/) { 5591f110e0Safresh1 $head = '' if $head eq '.'; 5691f110e0Safresh1 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; 5791f110e0Safresh1 $head .= $pat; 5891f110e0Safresh1 if ($cond eq 'd') { push(@retval,$head) if -d $head } 5991f110e0Safresh1 else { push(@retval,$head) if -e $head } 6091f110e0Safresh1 next OUTER; 6191f110e0Safresh1 } 6291f110e0Safresh1 opendir(D, $head) or next OUTER; 6391f110e0Safresh1 my @leaves = readdir D; 6491f110e0Safresh1 closedir D; 6591f110e0Safresh1 6691f110e0Safresh1 # VMS-format filespecs, especially if they contain extended characters, 6791f110e0Safresh1 # are unlikely to match patterns correctly, so Unixify them. 6891f110e0Safresh1 if ($^O eq 'VMS') { 6991f110e0Safresh1 require VMS::Filespec; 7091f110e0Safresh1 @leaves = map {$_ =~ s/\.$//; VMS::Filespec::unixify($_)} @leaves; 7191f110e0Safresh1 } 7291f110e0Safresh1 $head = '' if $head eq '.'; 7391f110e0Safresh1 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; 7491f110e0Safresh1 7591f110e0Safresh1 # escape regex metachars but not glob chars 7691f110e0Safresh1 $pat =~ s:([].+^\-\${}()[|]):\\$1:g; 7791f110e0Safresh1 # and convert DOS-style wildcards to regex 7891f110e0Safresh1 $pat =~ s/\*/.*/g; 7991f110e0Safresh1 $pat =~ s/\?/.?/g; 8091f110e0Safresh1 8191f110e0Safresh1 my $matchsub = sub { $_[0] =~ m|^$pat\z|is }; 8291f110e0Safresh1 INNER: 8391f110e0Safresh1 for my $e (@leaves) { 8491f110e0Safresh1 next INNER if $e eq '.' or $e eq '..'; 8591f110e0Safresh1 next INNER if $cond eq 'd' and ! -d "$head$e"; 8691f110e0Safresh1 push(@matched, "$head$e"), next INNER if &$matchsub($e); 8791f110e0Safresh1 # 8891f110e0Safresh1 # [DOS compatibility special case] 8991f110e0Safresh1 # Failed, add a trailing dot and try again, but only 9091f110e0Safresh1 # if name does not have a dot in it *and* pattern 9191f110e0Safresh1 # has a dot *and* name is shorter than 9 chars. 9291f110e0Safresh1 # 9391f110e0Safresh1 if (index($e,'.') == -1 and length($e) < 9 9491f110e0Safresh1 and index($pat,'\\.') != -1) { 9591f110e0Safresh1 push(@matched, "$head$e"), next INNER if &$matchsub("$e."); 9691f110e0Safresh1 } 9791f110e0Safresh1 } 9891f110e0Safresh1 push @retval, @matched if @matched; 9991f110e0Safresh1 } 10091f110e0Safresh1 if ($fix_drive_relative_paths) { 10191f110e0Safresh1 s|^([A-Za-z]:)\./|$1| for @retval; 10291f110e0Safresh1 } 10391f110e0Safresh1 return @retval; 10491f110e0Safresh1} 10591f110e0Safresh1 10691f110e0Safresh1# 10791f110e0Safresh1# this can be used to override CORE::glob in a specific 10891f110e0Safresh1# package by saying C<use File::DosGlob 'glob';> in that 10991f110e0Safresh1# namespace. 11091f110e0Safresh1# 11191f110e0Safresh1 11291f110e0Safresh1# context (keyed by second cxix arg provided by core) 11391f110e0Safresh1our %entries; 11491f110e0Safresh1 11591f110e0Safresh1sub glob { 11691f110e0Safresh1 my($pat,$cxix) = ($_[0], _callsite()); 11791f110e0Safresh1 my @pat; 11891f110e0Safresh1 11991f110e0Safresh1 # glob without args defaults to $_ 12091f110e0Safresh1 $pat = $_ unless defined $pat; 12191f110e0Safresh1 12291f110e0Safresh1 # if we're just beginning, do it all first 12391f110e0Safresh1 if (!$entries{$cxix}) { 12491f110e0Safresh1 # extract patterns 12591f110e0Safresh1 if ($pat =~ /\s/) { 12691f110e0Safresh1 require Text::ParseWords; 12791f110e0Safresh1 @pat = Text::ParseWords::parse_line('\s+',0,$pat); 12891f110e0Safresh1 } 12991f110e0Safresh1 else { 13091f110e0Safresh1 push @pat, $pat; 13191f110e0Safresh1 } 13291f110e0Safresh1 13391f110e0Safresh1 # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3. 13491f110e0Safresh1 # abc3 will be the original {3} (and drop the {}). 13591f110e0Safresh1 # abc1 abc2 will be put in @appendpat. 13691f110e0Safresh1 # This was just the easiest way, not nearly the best. 13791f110e0Safresh1 REHASH: { 13891f110e0Safresh1 my @appendpat = (); 13991f110e0Safresh1 for (@pat) { 14091f110e0Safresh1 # There must be a "," I.E. abc{efg} is not what we want. 14191f110e0Safresh1 while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) { 14291f110e0Safresh1 my ($start, $match, $end) = ($1, $2, $3); 14391f110e0Safresh1 #print "Got: \n\t$start\n\t$match\n\t$end\n"; 14491f110e0Safresh1 my $tmp = "$start$match$end"; 14591f110e0Safresh1 while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) { 14691f110e0Safresh1 # these expansions will be performed by the original, 14791f110e0Safresh1 # when we call REHASH. 14891f110e0Safresh1 } 14991f110e0Safresh1 push @appendpat, ("$tmp"); 15091f110e0Safresh1 s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/; 15191f110e0Safresh1 if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) { 15291f110e0Safresh1 $match = $1; 15391f110e0Safresh1 #print "GOT: \n\t$start\n\t$match\n\t$end\n\n"; 15491f110e0Safresh1 $_ = "$start$match$end"; 15591f110e0Safresh1 } 15691f110e0Safresh1 } 15791f110e0Safresh1 #print "Sould have "GOT" vs "Got"!\n"; 15891f110e0Safresh1 #FIXME: There should be checking for this. 159*6fb12b70Safresh1 # How or what should be done about failure is beyond me. 16091f110e0Safresh1 } 16191f110e0Safresh1 if ( $#appendpat != -1 16291f110e0Safresh1 ) { 16391f110e0Safresh1 #FIXME: Max loop, no way! :") 16491f110e0Safresh1 for ( @appendpat ) { 16591f110e0Safresh1 push @pat, $_; 16691f110e0Safresh1 } 16791f110e0Safresh1 goto REHASH; 16891f110e0Safresh1 } 16991f110e0Safresh1 } 17091f110e0Safresh1 for ( @pat ) { 17191f110e0Safresh1 s/\\([{},])/$1/g; 17291f110e0Safresh1 } 17391f110e0Safresh1 17491f110e0Safresh1 $entries{$cxix} = [doglob(1,@pat)]; 17591f110e0Safresh1 } 17691f110e0Safresh1 17791f110e0Safresh1 # chuck it all out, quick or slow 17891f110e0Safresh1 if (wantarray) { 17991f110e0Safresh1 return @{delete $entries{$cxix}}; 18091f110e0Safresh1 } 18191f110e0Safresh1 else { 18291f110e0Safresh1 if (scalar @{$entries{$cxix}}) { 18391f110e0Safresh1 return shift @{$entries{$cxix}}; 18491f110e0Safresh1 } 18591f110e0Safresh1 else { 18691f110e0Safresh1 # return undef for EOL 18791f110e0Safresh1 delete $entries{$cxix}; 18891f110e0Safresh1 return undef; 18991f110e0Safresh1 } 19091f110e0Safresh1 } 19191f110e0Safresh1} 19291f110e0Safresh1 19391f110e0Safresh1{ 19491f110e0Safresh1 no strict 'refs'; 19591f110e0Safresh1 19691f110e0Safresh1 sub import { 19791f110e0Safresh1 my $pkg = shift; 19891f110e0Safresh1 return unless @_; 19991f110e0Safresh1 my $sym = shift; 20091f110e0Safresh1 my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0)); 20191f110e0Safresh1 *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob'; 20291f110e0Safresh1 } 20391f110e0Safresh1} 20491f110e0Safresh11; 20591f110e0Safresh1 20691f110e0Safresh1__END__ 20791f110e0Safresh1 20891f110e0Safresh1=head1 NAME 20991f110e0Safresh1 21091f110e0Safresh1File::DosGlob - DOS like globbing and then some 21191f110e0Safresh1 21291f110e0Safresh1=head1 SYNOPSIS 21391f110e0Safresh1 21491f110e0Safresh1 require 5.004; 21591f110e0Safresh1 21691f110e0Safresh1 # override CORE::glob in current package 21791f110e0Safresh1 use File::DosGlob 'glob'; 21891f110e0Safresh1 21991f110e0Safresh1 # override CORE::glob in ALL packages (use with extreme caution!) 22091f110e0Safresh1 use File::DosGlob 'GLOBAL_glob'; 22191f110e0Safresh1 22291f110e0Safresh1 @perlfiles = glob "..\\pe?l/*.p?"; 22391f110e0Safresh1 print <..\\pe?l/*.p?>; 22491f110e0Safresh1 22591f110e0Safresh1 # from the command line (overrides only in main::) 22691f110e0Safresh1 > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>" 22791f110e0Safresh1 22891f110e0Safresh1=head1 DESCRIPTION 22991f110e0Safresh1 23091f110e0Safresh1A module that implements DOS-like globbing with a few enhancements. 23191f110e0Safresh1It is largely compatible with perlglob.exe (the M$ setargv.obj 23291f110e0Safresh1version) in all but one respect--it understands wildcards in 23391f110e0Safresh1directory components. 23491f110e0Safresh1 23591f110e0Safresh1For example, C<< <..\\l*b\\file/*glob.p?> >> will work as expected (in 23691f110e0Safresh1that it will find something like '..\lib\File/DosGlob.pm' alright). 23791f110e0Safresh1Note that all path components are case-insensitive, and that 23891f110e0Safresh1backslashes and forward slashes are both accepted, and preserved. 23991f110e0Safresh1You may have to double the backslashes if you are putting them in 24091f110e0Safresh1literally, due to double-quotish parsing of the pattern by perl. 24191f110e0Safresh1 24291f110e0Safresh1Spaces in the argument delimit distinct patterns, so 24391f110e0Safresh1C<glob('*.exe *.dll')> globs all filenames that end in C<.exe> 24491f110e0Safresh1or C<.dll>. If you want to put in literal spaces in the glob 24591f110e0Safresh1pattern, you can escape them with either double quotes, or backslashes. 24691f110e0Safresh1e.g. C<glob('c:/"Program Files"/*/*.dll')>, or 24791f110e0Safresh1C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using 24891f110e0Safresh1C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details 24991f110e0Safresh1of the quoting rules used. 25091f110e0Safresh1 25191f110e0Safresh1Extending it to csh patterns is left as an exercise to the reader. 25291f110e0Safresh1 25391f110e0Safresh1=head1 EXPORTS (by request only) 25491f110e0Safresh1 25591f110e0Safresh1glob() 25691f110e0Safresh1 25791f110e0Safresh1=head1 BUGS 25891f110e0Safresh1 25991f110e0Safresh1Should probably be built into the core, and needs to stop 260*6fb12b70Safresh1pandering to DOS habits. Needs a dose of optimization too. 26191f110e0Safresh1 26291f110e0Safresh1=head1 AUTHOR 26391f110e0Safresh1 26491f110e0Safresh1Gurusamy Sarathy <gsar@activestate.com> 26591f110e0Safresh1 26691f110e0Safresh1=head1 HISTORY 26791f110e0Safresh1 26891f110e0Safresh1=over 4 26991f110e0Safresh1 27091f110e0Safresh1=item * 27191f110e0Safresh1 27291f110e0Safresh1Support for globally overriding glob() (GSAR 3-JUN-98) 27391f110e0Safresh1 27491f110e0Safresh1=item * 27591f110e0Safresh1 27691f110e0Safresh1Scalar context, independent iterator context fixes (GSAR 15-SEP-97) 27791f110e0Safresh1 27891f110e0Safresh1=item * 27991f110e0Safresh1 28091f110e0Safresh1A few dir-vs-file optimizations result in glob importation being 28191f110e0Safresh110 times faster than using perlglob.exe, and using perlglob.bat is 28291f110e0Safresh1only twice as slow as perlglob.exe (GSAR 28-MAY-97) 28391f110e0Safresh1 28491f110e0Safresh1=item * 28591f110e0Safresh1 28691f110e0Safresh1Several cleanups prompted by lack of compatible perlglob.exe 28791f110e0Safresh1under Borland (GSAR 27-MAY-97) 28891f110e0Safresh1 28991f110e0Safresh1=item * 29091f110e0Safresh1 29191f110e0Safresh1Initial version (GSAR 20-FEB-97) 29291f110e0Safresh1 29391f110e0Safresh1=back 29491f110e0Safresh1 29591f110e0Safresh1=head1 SEE ALSO 29691f110e0Safresh1 29791f110e0Safresh1perl 29891f110e0Safresh1 29991f110e0Safresh1perlglob.bat 30091f110e0Safresh1 30191f110e0Safresh1Text::ParseWords 30291f110e0Safresh1 30391f110e0Safresh1=cut 30491f110e0Safresh1 305