1#!perl -w 2 3# use strict fails 4#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191. 5 6# 7# Documentation at the __END__ 8# 9 10package File::DosGlob; 11 12our $VERSION = '1.00'; 13use strict; 14use warnings; 15 16sub doglob { 17 my $cond = shift; 18 my @retval = (); 19 #print "doglob: ", join('|', @_), "\n"; 20 OUTER: 21 for my $pat (@_) { 22 my @matched = (); 23 my @globdirs = (); 24 my $head = '.'; 25 my $sepchr = '/'; 26 my $tail; 27 next OUTER unless defined $pat and $pat ne ''; 28 # if arg is within quotes strip em and do no globbing 29 if ($pat =~ /^"(.*)"\z/s) { 30 $pat = $1; 31 if ($cond eq 'd') { push(@retval, $pat) if -d $pat } 32 else { push(@retval, $pat) if -e $pat } 33 next OUTER; 34 } 35 # wildcards with a drive prefix such as h:*.pm must be changed 36 # to h:./*.pm to expand correctly 37 if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) { 38 substr($_,0,2) = $1 . "./"; 39 } 40 if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) { 41 ($head, $sepchr, $tail) = ($1,$2,$3); 42 #print "div: |$head|$sepchr|$tail|\n"; 43 push (@retval, $pat), next OUTER if $tail eq ''; 44 if ($head =~ /[*?]/) { 45 @globdirs = doglob('d', $head); 46 push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)), 47 next OUTER if @globdirs; 48 } 49 $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s; 50 $pat = $tail; 51 } 52 # 53 # If file component has no wildcards, we can avoid opendir 54 unless ($pat =~ /[*?]/) { 55 $head = '' if $head eq '.'; 56 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; 57 $head .= $pat; 58 if ($cond eq 'd') { push(@retval,$head) if -d $head } 59 else { push(@retval,$head) if -e $head } 60 next OUTER; 61 } 62 opendir(D, $head) or next OUTER; 63 my @leaves = readdir D; 64 closedir D; 65 $head = '' if $head eq '.'; 66 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr; 67 68 # escape regex metachars but not glob chars 69 $pat =~ s:([].+^\-\${}[|]):\\$1:g; 70 # and convert DOS-style wildcards to regex 71 $pat =~ s/\*/.*/g; 72 $pat =~ s/\?/.?/g; 73 74 #print "regex: '$pat', head: '$head'\n"; 75 my $matchsub = sub { $_[0] =~ m|^$pat\z|is }; 76 INNER: 77 for my $e (@leaves) { 78 next INNER if $e eq '.' or $e eq '..'; 79 next INNER if $cond eq 'd' and ! -d "$head$e"; 80 push(@matched, "$head$e"), next INNER if &$matchsub($e); 81 # 82 # [DOS compatibility special case] 83 # Failed, add a trailing dot and try again, but only 84 # if name does not have a dot in it *and* pattern 85 # has a dot *and* name is shorter than 9 chars. 86 # 87 if (index($e,'.') == -1 and length($e) < 9 88 and index($pat,'\\.') != -1) { 89 push(@matched, "$head$e"), next INNER if &$matchsub("$e."); 90 } 91 } 92 push @retval, @matched if @matched; 93 } 94 return @retval; 95} 96 97 98# 99# Do DOS-like globbing on Mac OS 100# 101sub doglob_Mac { 102 my $cond = shift; 103 my @retval = (); 104 105 #print "doglob_Mac: ", join('|', @_), "\n"; 106 OUTER: 107 for my $arg (@_) { 108 local $_ = $arg; 109 my @matched = (); 110 my @globdirs = (); 111 my $head = ':'; 112 my $not_esc_head = $head; 113 my $sepchr = ':'; 114 next OUTER unless defined $_ and $_ ne ''; 115 # if arg is within quotes strip em and do no globbing 116 if (/^"(.*)"\z/s) { 117 $_ = $1; 118 # $_ may contain escaped metachars '\*', '\?' and '\' 119 my $not_esc_arg = $_; 120 $not_esc_arg =~ s/\\([*?\\])/$1/g; 121 if ($cond eq 'd') { push(@retval, $not_esc_arg) if -d $not_esc_arg } 122 else { push(@retval, $not_esc_arg) if -e $not_esc_arg } 123 next OUTER; 124 } 125 126 if (m|^(.*?)(:+)([^:]*)\z|s) { # note: $1 is not greedy 127 my $tail; 128 ($head, $sepchr, $tail) = ($1,$2,$3); 129 #print "div: |$head|$sepchr|$tail|\n"; 130 push (@retval, $_), next OUTER if $tail eq ''; 131 # 132 # $head may contain escaped metachars '\*' and '\?' 133 134 my $tmp_head = $head; 135 # if a '*' or '?' is preceded by an odd count of '\', temporary delete 136 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as 137 # wildcards 138 $tmp_head =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg; 139 140 if ($tmp_head =~ /[*?]/) { # if there are wildcards ... 141 @globdirs = doglob_Mac('d', $head); 142 push(@retval, doglob_Mac($cond, map {"$_$sepchr$tail"} @globdirs)), 143 next OUTER if @globdirs; 144 } 145 146 $head .= $sepchr; 147 $not_esc_head = $head; 148 # unescape $head for file operations 149 $not_esc_head =~ s/\\([*?\\])/$1/g; 150 $_ = $tail; 151 } 152 # 153 # If file component has no wildcards, we can avoid opendir 154 155 my $tmp_tail = $_; 156 # if a '*' or '?' is preceded by an odd count of '\', temporary delete 157 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as 158 # wildcards 159 $tmp_tail =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg; 160 161 unless ($tmp_tail =~ /[*?]/) { # if there are wildcards ... 162 $not_esc_head = $head = '' if $head eq ':'; 163 my $not_esc_tail = $_; 164 # unescape $head and $tail for file operations 165 $not_esc_tail =~ s/\\([*?\\])/$1/g; 166 $head .= $_; 167 $not_esc_head .= $not_esc_tail; 168 if ($cond eq 'd') { push(@retval,$head) if -d $not_esc_head } 169 else { push(@retval,$head) if -e $not_esc_head } 170 next OUTER; 171 } 172 #print "opendir($not_esc_head)\n"; 173 opendir(D, $not_esc_head) or next OUTER; 174 my @leaves = readdir D; 175 closedir D; 176 177 # escape regex metachars but not '\' and glob chars '*', '?' 178 $_ =~ s:([].+^\-\${}[|]):\\$1:g; 179 # and convert DOS-style wildcards to regex, 180 # but only if they are not escaped 181 $_ =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg; 182 183 #print "regex: '$_', head: '$head', unescaped head: '$not_esc_head'\n"; 184 my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '\\z|ios }'; 185 warn($@), next OUTER if $@; 186 INNER: 187 for my $e (@leaves) { 188 next INNER if $e eq '.' or $e eq '..'; 189 next INNER if $cond eq 'd' and ! -d "$not_esc_head$e"; 190 191 if (&$matchsub($e)) { 192 my $leave = (($not_esc_head eq ':') && (-f "$not_esc_head$e")) ? 193 "$e" : "$not_esc_head$e"; 194 # 195 # On Mac OS, the two glob metachars '*' and '?' and the escape 196 # char '\' are valid characters for file and directory names. 197 # We have to escape and treat them specially. 198 $leave =~ s|([*?\\])|\\$1|g; 199 push(@matched, $leave); 200 next INNER; 201 } 202 } 203 push @retval, @matched if @matched; 204 } 205 return @retval; 206} 207 208# 209# _expand_volume() will only be used on Mac OS (Classic): 210# Takes an array of original patterns as argument and returns an array of 211# possibly modified patterns. Each original pattern is processed like 212# that: 213# + If there's a volume name in the pattern, we push a separate pattern 214# for each mounted volume that matches (with '*', '?' and '\' escaped). 215# + If there's no volume name in the original pattern, it is pushed 216# unchanged. 217# Note that the returned array of patterns may be empty. 218# 219sub _expand_volume { 220 221 require MacPerl; # to be verbose 222 223 my @pat = @_; 224 my @new_pat = (); 225 my @FSSpec_Vols = MacPerl::Volumes(); 226 my @mounted_volumes = (); 227 228 foreach my $spec_vol (@FSSpec_Vols) { 229 # push all mounted volumes into array 230 push @mounted_volumes, MacPerl::MakePath($spec_vol); 231 } 232 #print "mounted volumes: |@mounted_volumes|\n"; 233 234 while (@pat) { 235 my $pat = shift @pat; 236 if ($pat =~ /^([^:]+:)(.*)\z/) { # match a volume name? 237 my $vol_pat = $1; 238 my $tail = $2; 239 # 240 # escape regex metachars but not '\' and glob chars '*', '?' 241 $vol_pat =~ s:([].+^\-\${}[|]):\\$1:g; 242 # and convert DOS-style wildcards to regex, 243 # but only if they are not escaped 244 $vol_pat =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg; 245 #print "volume regex: '$vol_pat' \n"; 246 247 foreach my $volume (@mounted_volumes) { 248 if ($volume =~ m|^$vol_pat\z|ios) { 249 # 250 # On Mac OS, the two glob metachars '*' and '?' and the 251 # escape char '\' are valid characters for volume names. 252 # We have to escape and treat them specially. 253 $volume =~ s|([*?\\])|\\$1|g; 254 push @new_pat, $volume . $tail; 255 } 256 } 257 } else { # no volume name in pattern, push original pattern 258 push @new_pat, $pat; 259 } 260 } 261 return @new_pat; 262} 263 264 265# 266# _preprocess_pattern() will only be used on Mac OS (Classic): 267# Resolves any updirs in the pattern. Removes a single trailing colon 268# from the pattern, unless it's a volume name pattern like "*HD:" 269# 270sub _preprocess_pattern { 271 my @pat = @_; 272 273 foreach my $p (@pat) { 274 my $proceed; 275 # resolve any updirs, e.g. "*HD:t?p::a*" -> "*HD:a*" 276 do { 277 $proceed = ($p =~ s/^(.*):[^:]+::(.*?)\z/$1:$2/); 278 } while ($proceed); 279 # remove a single trailing colon, e.g. ":*:" -> ":*" 280 $p =~ s/:([^:]+):\z/:$1/; 281 } 282 return @pat; 283} 284 285 286# 287# _un_escape() will only be used on Mac OS (Classic): 288# Unescapes a list of arguments which may contain escaped 289# metachars '*', '?' and '\'. 290# 291sub _un_escape { 292 foreach (@_) { 293 s/\\([*?\\])/$1/g; 294 } 295 return @_; 296} 297 298# 299# this can be used to override CORE::glob in a specific 300# package by saying C<use File::DosGlob 'glob';> in that 301# namespace. 302# 303 304# context (keyed by second cxix arg provided by core) 305my %iter; 306my %entries; 307 308sub glob { 309 my($pat,$cxix) = @_; 310 my @pat; 311 312 # glob without args defaults to $_ 313 $pat = $_ unless defined $pat; 314 315 # extract patterns 316 if ($pat =~ /\s/) { 317 require Text::ParseWords; 318 @pat = Text::ParseWords::parse_line('\s+',0,$pat); 319 } 320 else { 321 push @pat, $pat; 322 } 323 324 # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3. 325 # abc3 will be the original {3} (and drop the {}). 326 # abc1 abc2 will be put in @appendpat. 327 # This was just the esiest way, not nearly the best. 328 REHASH: { 329 my @appendpat = (); 330 for (@pat) { 331 # There must be a "," I.E. abc{efg} is not what we want. 332 while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) { 333 my ($start, $match, $end) = ($1, $2, $3); 334 #print "Got: \n\t$start\n\t$match\n\t$end\n"; 335 my $tmp = "$start$match$end"; 336 while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) { 337 #print "Striped: $tmp\n"; 338 # these expanshions will be preformed by the original, 339 # when we call REHASH. 340 } 341 push @appendpat, ("$tmp"); 342 s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/; 343 if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) { 344 $match = $1; 345 #print "GOT: \n\t$start\n\t$match\n\t$end\n\n"; 346 $_ = "$start$match$end"; 347 } 348 } 349 #print "Sould have "GOT" vs "Got"!\n"; 350 #FIXME: There should be checking for this. 351 # How or what should be done about failure is beond me. 352 } 353 if ( $#appendpat != -1 354 ) { 355 #print "LOOP\n"; 356 #FIXME: Max loop, no way! :") 357 for ( @appendpat ) { 358 push @pat, $_; 359 } 360 goto REHASH; 361 } 362 } 363 for ( @pat ) { 364 s/\\{/{/g; 365 s/\\}/}/g; 366 s/\\,/,/g; 367 } 368 #print join ("\n", @pat). "\n"; 369 370 # assume global context if not provided one 371 $cxix = '_G_' unless defined $cxix; 372 $iter{$cxix} = 0 unless exists $iter{$cxix}; 373 374 # if we're just beginning, do it all first 375 if ($iter{$cxix} == 0) { 376 if ($^O eq 'MacOS') { 377 # first, take care of updirs and trailing colons 378 @pat = _preprocess_pattern(@pat); 379 # expand volume names 380 @pat = _expand_volume(@pat); 381 $entries{$cxix} = (@pat) ? [_un_escape( doglob_Mac(1,@pat) )] : [()]; 382 } else { 383 $entries{$cxix} = [doglob(1,@pat)]; 384 } 385 } 386 387 # chuck it all out, quick or slow 388 if (wantarray) { 389 delete $iter{$cxix}; 390 return @{delete $entries{$cxix}}; 391 } 392 else { 393 if ($iter{$cxix} = scalar @{$entries{$cxix}}) { 394 return shift @{$entries{$cxix}}; 395 } 396 else { 397 # return undef for EOL 398 delete $iter{$cxix}; 399 delete $entries{$cxix}; 400 return undef; 401 } 402 } 403} 404 405{ 406 no strict 'refs'; 407 408 sub import { 409 my $pkg = shift; 410 return unless @_; 411 my $sym = shift; 412 my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0)); 413 *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob'; 414 } 415} 4161; 417 418__END__ 419 420=head1 NAME 421 422File::DosGlob - DOS like globbing and then some 423 424=head1 SYNOPSIS 425 426 require 5.004; 427 428 # override CORE::glob in current package 429 use File::DosGlob 'glob'; 430 431 # override CORE::glob in ALL packages (use with extreme caution!) 432 use File::DosGlob 'GLOBAL_glob'; 433 434 @perlfiles = glob "..\\pe?l/*.p?"; 435 print <..\\pe?l/*.p?>; 436 437 # from the command line (overrides only in main::) 438 > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>" 439 440=head1 DESCRIPTION 441 442A module that implements DOS-like globbing with a few enhancements. 443It is largely compatible with perlglob.exe (the M$ setargv.obj 444version) in all but one respect--it understands wildcards in 445directory components. 446 447For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in 448that it will find something like '..\lib\File/DosGlob.pm' alright). 449Note that all path components are case-insensitive, and that 450backslashes and forward slashes are both accepted, and preserved. 451You may have to double the backslashes if you are putting them in 452literally, due to double-quotish parsing of the pattern by perl. 453 454Spaces in the argument delimit distinct patterns, so 455C<glob('*.exe *.dll')> globs all filenames that end in C<.exe> 456or C<.dll>. If you want to put in literal spaces in the glob 457pattern, you can escape them with either double quotes, or backslashes. 458e.g. C<glob('c:/"Program Files"/*/*.dll')>, or 459C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using 460C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details 461of the quoting rules used. 462 463Extending it to csh patterns is left as an exercise to the reader. 464 465=head1 NOTES 466 467=over 4 468 469=item * 470 471Mac OS (Classic) users should note a few differences. The specification 472of pathnames in glob patterns adheres to the usual Mac OS conventions: 473The path separator is a colon ':', not a slash '/' or backslash '\'. A 474full path always begins with a volume name. A relative pathname on Mac 475OS must always begin with a ':', except when specifying a file or 476directory name in the current working directory, where the leading colon 477is optional. If specifying a volume name only, a trailing ':' is 478required. Due to these rules, a glob like E<lt>*:E<gt> will find all 479mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find 480all files and directories in the current directory. 481 482Note that updirs in the glob pattern are resolved before the matching begins, 483i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also, 484that a single trailing ':' in the pattern is ignored (unless it's a volume 485name pattern like "*HD:"), i.e. a glob like <:*:> will find both directories 486I<and> files (and not, as one might expect, only directories). 487 488The metachars '*', '?' and the escape char '\' are valid characters in 489volume, directory and file names on Mac OS. Hence, if you want to match 490a '*', '?' or '\' literally, you have to escape these characters. Due to 491perl's quoting rules, things may get a bit complicated, when you want to 492match a string like '\*' literally, or when you want to match '\' literally, 493but treat the immediately following character '*' as metachar. So, here's a 494rule of thumb (applies to both single- and double-quoted strings): escape 495each '*' or '?' or '\' with a backslash, if you want to treat them literally, 496and then double each backslash and your are done. E.g. 497 498- Match '\*' literally 499 500 escape both '\' and '*' : '\\\*' 501 double the backslashes : '\\\\\\*' 502 503(Internally, the glob routine sees a '\\\*', which means that both '\' and 504'*' are escaped.) 505 506 507- Match '\' literally, treat '*' as metachar 508 509 escape '\' but not '*' : '\\*' 510 double the backslashes : '\\\\*' 511 512(Internally, the glob routine sees a '\\*', which means that '\' is escaped and 513'*' is not.) 514 515Note that you also have to quote literal spaces in the glob pattern, as described 516above. 517 518=back 519 520=head1 EXPORTS (by request only) 521 522glob() 523 524=head1 BUGS 525 526Should probably be built into the core, and needs to stop 527pandering to DOS habits. Needs a dose of optimizium too. 528 529=head1 AUTHOR 530 531Gurusamy Sarathy <gsar@activestate.com> 532 533=head1 HISTORY 534 535=over 4 536 537=item * 538 539Support for globally overriding glob() (GSAR 3-JUN-98) 540 541=item * 542 543Scalar context, independent iterator context fixes (GSAR 15-SEP-97) 544 545=item * 546 547A few dir-vs-file optimizations result in glob importation being 54810 times faster than using perlglob.exe, and using perlglob.bat is 549only twice as slow as perlglob.exe (GSAR 28-MAY-97) 550 551=item * 552 553Several cleanups prompted by lack of compatible perlglob.exe 554under Borland (GSAR 27-MAY-97) 555 556=item * 557 558Initial version (GSAR 20-FEB-97) 559 560=back 561 562=head1 SEE ALSO 563 564perl 565 566perlglob.bat 567 568Text::ParseWords 569 570=cut 571 572