xref: /openbsd-src/gnu/usr.bin/perl/ext/File-Glob/Glob.pm (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1package File::Glob;
2
3use strict;
4our($VERSION, @ISA, @EXPORT_OK, @EXPORT_FAIL, %EXPORT_TAGS,
5    $AUTOLOAD, $DEFAULT_FLAGS);
6
7use XSLoader ();
8
9@ISA = qw(Exporter);
10
11# NOTE: The glob() export is only here for compatibility with 5.6.0.
12# csh_glob() should not be used directly, unless you know what you're doing.
13
14@EXPORT_OK   = qw(
15    csh_glob
16    bsd_glob
17    glob
18    GLOB_ABEND
19    GLOB_ALPHASORT
20    GLOB_ALTDIRFUNC
21    GLOB_BRACE
22    GLOB_CSH
23    GLOB_ERR
24    GLOB_ERROR
25    GLOB_LIMIT
26    GLOB_MARK
27    GLOB_NOCASE
28    GLOB_NOCHECK
29    GLOB_NOMAGIC
30    GLOB_NOSORT
31    GLOB_NOSPACE
32    GLOB_QUOTE
33    GLOB_TILDE
34);
35
36%EXPORT_TAGS = (
37    'glob' => [ qw(
38        GLOB_ABEND
39	GLOB_ALPHASORT
40        GLOB_ALTDIRFUNC
41        GLOB_BRACE
42        GLOB_CSH
43        GLOB_ERR
44        GLOB_ERROR
45        GLOB_LIMIT
46        GLOB_MARK
47        GLOB_NOCASE
48        GLOB_NOCHECK
49        GLOB_NOMAGIC
50        GLOB_NOSORT
51        GLOB_NOSPACE
52        GLOB_QUOTE
53        GLOB_TILDE
54        glob
55        bsd_glob
56    ) ],
57);
58
59$VERSION = '1.07';
60
61sub import {
62    require Exporter;
63    my $i = 1;
64    while ($i < @_) {
65	if ($_[$i] =~ /^:(case|nocase|globally)$/) {
66	    splice(@_, $i, 1);
67	    $DEFAULT_FLAGS &= ~GLOB_NOCASE() if $1 eq 'case';
68	    $DEFAULT_FLAGS |= GLOB_NOCASE() if $1 eq 'nocase';
69	    if ($1 eq 'globally') {
70		local $^W;
71		*CORE::GLOBAL::glob = \&File::Glob::csh_glob;
72	    }
73	    next;
74	}
75	++$i;
76    }
77    goto &Exporter::import;
78}
79
80sub AUTOLOAD {
81    # This AUTOLOAD is used to 'autoload' constants from the constant()
82    # XS function.  If a constant is not found then control is passed
83    # to the AUTOLOAD in AutoLoader.
84
85    my $constname;
86    ($constname = $AUTOLOAD) =~ s/.*:://;
87    my ($error, $val) = constant($constname);
88    if ($error) {
89	require Carp;
90	Carp::croak($error);
91    }
92    eval "sub $AUTOLOAD { $val }";
93    goto &$AUTOLOAD;
94}
95
96XSLoader::load 'File::Glob', $VERSION;
97
98# Preloaded methods go here.
99
100sub GLOB_ERROR {
101    return (constant('GLOB_ERROR'))[1];
102}
103
104sub GLOB_CSH () {
105    GLOB_BRACE()
106	| GLOB_NOMAGIC()
107	| GLOB_QUOTE()
108	| GLOB_TILDE()
109	| GLOB_ALPHASORT()
110}
111
112$DEFAULT_FLAGS = GLOB_CSH();
113if ($^O =~ /^(?:MSWin32|VMS|os2|dos|riscos|MacOS)$/) {
114    $DEFAULT_FLAGS |= GLOB_NOCASE();
115}
116
117# Autoload methods go after =cut, and are processed by the autosplit program.
118
119sub bsd_glob {
120    my ($pat,$flags) = @_;
121    $flags = $DEFAULT_FLAGS if @_ < 2;
122    return doglob($pat,$flags);
123}
124
125# File::Glob::glob() is deprecated because its prototype is different from
126# CORE::glob() (use bsd_glob() instead)
127sub glob {
128    splice @_, 1; # don't pass PL_glob_index as flags!
129    goto &bsd_glob;
130}
131
132## borrowed heavily from gsar's File::DosGlob
133my %iter;
134my %entries;
135
136sub csh_glob {
137    my $pat = shift;
138    my $cxix = shift;
139    my @pat;
140
141    # glob without args defaults to $_
142    $pat = $_ unless defined $pat;
143
144    # extract patterns
145    $pat =~ s/^\s+//;	# Protect against empty elements in
146    $pat =~ s/\s+$//;	# things like < *.c> and <*.c >.
147			# These alone shouldn't trigger ParseWords.
148    if ($pat =~ /\s/) {
149        # XXX this is needed for compatibility with the csh
150	# implementation in Perl.  Need to support a flag
151	# to disable this behavior.
152	require Text::ParseWords;
153	@pat = Text::ParseWords::parse_line('\s+',0,$pat);
154    }
155
156    # assume global context if not provided one
157    $cxix = '_G_' unless defined $cxix;
158    $iter{$cxix} = 0 unless exists $iter{$cxix};
159
160    # if we're just beginning, do it all first
161    if ($iter{$cxix} == 0) {
162	if (@pat) {
163	    $entries{$cxix} = [ map { doglob($_, $DEFAULT_FLAGS) } @pat ];
164	}
165	else {
166	    $entries{$cxix} = [ doglob($pat, $DEFAULT_FLAGS) ];
167	}
168    }
169
170    # chuck it all out, quick or slow
171    if (wantarray) {
172        delete $iter{$cxix};
173        return @{delete $entries{$cxix}};
174    }
175    else {
176        if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
177            return shift @{$entries{$cxix}};
178        }
179        else {
180            # return undef for EOL
181            delete $iter{$cxix};
182            delete $entries{$cxix};
183            return undef;
184        }
185    }
186}
187
1881;
189__END__
190
191=head1 NAME
192
193File::Glob - Perl extension for BSD glob routine
194
195=head1 SYNOPSIS
196
197  use File::Glob ':glob';
198
199  @list = bsd_glob('*.[ch]');
200  $homedir = bsd_glob('~gnat', GLOB_TILDE | GLOB_ERR);
201
202  if (GLOB_ERROR) {
203    # an error occurred reading $homedir
204  }
205
206  ## override the core glob (CORE::glob() does this automatically
207  ## by default anyway, since v5.6.0)
208  use File::Glob ':globally';
209  my @sources = <*.{c,h,y}>;
210
211  ## override the core glob, forcing case sensitivity
212  use File::Glob qw(:globally :case);
213  my @sources = <*.{c,h,y}>;
214
215  ## override the core glob forcing case insensitivity
216  use File::Glob qw(:globally :nocase);
217  my @sources = <*.{c,h,y}>;
218
219  ## glob on all files in home directory
220  use File::Glob ':globally';
221  my @sources = <~gnat/*>;
222
223=head1 DESCRIPTION
224
225The glob angle-bracket operator C<< <> >> is a pathname generator that
226implements the rules for file name pattern matching used by Unix-like shells
227such as the Bourne shell or C shell.
228
229File::Glob::bsd_glob() implements the FreeBSD glob(3) routine, which is
230a superset of the POSIX glob() (described in IEEE Std 1003.2 "POSIX.2").
231bsd_glob() takes a mandatory C<pattern> argument, and an optional
232C<flags> argument, and returns a list of filenames matching the
233pattern, with interpretation of the pattern modified by the C<flags>
234variable.
235
236Since v5.6.0, Perl's CORE::glob() is implemented in terms of bsd_glob().
237Note that they don't share the same prototype--CORE::glob() only accepts
238a single argument.  Due to historical reasons, CORE::glob() will also
239split its argument on whitespace, treating it as multiple patterns,
240whereas bsd_glob() considers them as one pattern.
241
242=head2 META CHARACTERS
243
244  \       Quote the next metacharacter
245  []      Character class
246  {}      Multiple pattern
247  *       Match any string of characters
248  ?       Match any single character
249  ~       User name home directory
250
251The metanotation C<a{b,c,d}e> is a shorthand for C<abe ace ade>.  Left to
252right order is preserved, with results of matches being sorted separately
253at a low level to preserve this order. As a special case C<{>, C<}>, and
254C<{}> are passed undisturbed.
255
256=head2 POSIX FLAGS
257
258The POSIX defined flags for bsd_glob() are:
259
260=over 4
261
262=item C<GLOB_ERR>
263
264Force bsd_glob() to return an error when it encounters a directory it
265cannot open or read.  Ordinarily bsd_glob() continues to find matches.
266
267=item C<GLOB_LIMIT>
268
269Make bsd_glob() return an error (GLOB_NOSPACE) when the pattern expands
270to a size bigger than the system constant C<ARG_MAX> (usually found in
271limits.h).  If your system does not define this constant, bsd_glob() uses
272C<sysconf(_SC_ARG_MAX)> or C<_POSIX_ARG_MAX> where available (in that
273order).  You can inspect these values using the standard C<POSIX>
274extension.
275
276=item C<GLOB_MARK>
277
278Each pathname that is a directory that matches the pattern has a slash
279appended.
280
281=item C<GLOB_NOCASE>
282
283By default, file names are assumed to be case sensitive; this flag
284makes bsd_glob() treat case differences as not significant.
285
286=item C<GLOB_NOCHECK>
287
288If the pattern does not match any pathname, then bsd_glob() returns a list
289consisting of only the pattern.  If C<GLOB_QUOTE> is set, its effect
290is present in the pattern returned.
291
292=item C<GLOB_NOSORT>
293
294By default, the pathnames are sorted in ascending ASCII order; this
295flag prevents that sorting (speeding up bsd_glob()).
296
297=back
298
299The FreeBSD extensions to the POSIX standard are the following flags:
300
301=over 4
302
303=item C<GLOB_BRACE>
304
305Pre-process the string to expand C<{pat,pat,...}> strings like csh(1).
306The pattern '{}' is left unexpanded for historical reasons (and csh(1)
307does the same thing to ease typing of find(1) patterns).
308
309=item C<GLOB_NOMAGIC>
310
311Same as C<GLOB_NOCHECK> but it only returns the pattern if it does not
312contain any of the special characters "*", "?" or "[".  C<NOMAGIC> is
313provided to simplify implementing the historic csh(1) globbing
314behaviour and should probably not be used anywhere else.
315
316=item C<GLOB_QUOTE>
317
318Use the backslash ('\') character for quoting: every occurrence of a
319backslash followed by a character in the pattern is replaced by that
320character, avoiding any special interpretation of the character.
321(But see below for exceptions on DOSISH systems).
322
323=item C<GLOB_TILDE>
324
325Expand patterns that start with '~' to user name home directories.
326
327=item C<GLOB_CSH>
328
329For convenience, C<GLOB_CSH> is a synonym for
330C<GLOB_BRACE | GLOB_NOMAGIC | GLOB_QUOTE | GLOB_TILDE | GLOB_ALPHASORT>.
331
332=back
333
334The POSIX provided C<GLOB_APPEND>, C<GLOB_DOOFFS>, and the FreeBSD
335extensions C<GLOB_ALTDIRFUNC>, and C<GLOB_MAGCHAR> flags have not been
336implemented in the Perl version because they involve more complex
337interaction with the underlying C structures.
338
339The following flag has been added in the Perl implementation for
340csh compatibility:
341
342=over 4
343
344=item C<GLOB_ALPHASORT>
345
346If C<GLOB_NOSORT> is not in effect, sort filenames is alphabetical
347order (case does not matter) rather than in ASCII order.
348
349=back
350
351=head1 DIAGNOSTICS
352
353bsd_glob() returns a list of matching paths, possibly zero length.  If an
354error occurred, &File::Glob::GLOB_ERROR will be non-zero and C<$!> will be
355set.  &File::Glob::GLOB_ERROR is guaranteed to be zero if no error occurred,
356or one of the following values otherwise:
357
358=over 4
359
360=item C<GLOB_NOSPACE>
361
362An attempt to allocate memory failed.
363
364=item C<GLOB_ABEND>
365
366The glob was stopped because an error was encountered.
367
368=back
369
370In the case where bsd_glob() has found some matching paths, but is
371interrupted by an error, it will return a list of filenames B<and>
372set &File::Glob::ERROR.
373
374Note that bsd_glob() deviates from POSIX and FreeBSD glob(3) behaviour
375by not considering C<ENOENT> and C<ENOTDIR> as errors - bsd_glob() will
376continue processing despite those errors, unless the C<GLOB_ERR> flag is
377set.
378
379Be aware that all filenames returned from File::Glob are tainted.
380
381=head1 NOTES
382
383=over 4
384
385=item *
386
387If you want to use multiple patterns, e.g. C<bsd_glob("a* b*")>, you should
388probably throw them in a set as in C<bsd_glob("{a*,b*}")>.  This is because
389the argument to bsd_glob() isn't subjected to parsing by the C shell.
390Remember that you can use a backslash to escape things.
391
392=item *
393
394On DOSISH systems, backslash is a valid directory separator character.
395In this case, use of backslash as a quoting character (via GLOB_QUOTE)
396interferes with the use of backslash as a directory separator. The
397best (simplest, most portable) solution is to use forward slashes for
398directory separators, and backslashes for quoting. However, this does
399not match "normal practice" on these systems. As a concession to user
400expectation, therefore, backslashes (under GLOB_QUOTE) only quote the
401glob metacharacters '[', ']', '{', '}', '-', '~', and backslash itself.
402All other backslashes are passed through unchanged.
403
404=item *
405
406Win32 users should use the real slash.  If you really want to use
407backslashes, consider using Sarathy's File::DosGlob, which comes with
408the standard Perl distribution.
409
410=item *
411
412Mac OS (Classic) users should note a few differences. Since
413Mac OS is not Unix, when the glob code encounters a tilde glob (e.g.
414~user) and the C<GLOB_TILDE> flag is used, it simply returns that
415pattern without doing any expansion.
416
417Glob on Mac OS is case-insensitive by default (if you don't use any
418flags). If you specify any flags at all and still want glob
419to be case-insensitive, you must include C<GLOB_NOCASE> in the flags.
420
421The path separator is ':' (aka colon), not '/' (aka slash). Mac OS users
422should be careful about specifying relative pathnames. While a full path
423always begins with a volume name, a relative pathname should always
424begin with a ':'.  If specifying a volume name only, a trailing ':' is
425required.
426
427The specification of pathnames in glob patterns adheres to the usual Mac
428OS conventions: The path separator is a colon ':', not a slash '/'. A
429full path always begins with a volume name. A relative pathname on Mac
430OS must always begin with a ':', except when specifying a file or
431directory name in the current working directory, where the leading colon
432is optional. If specifying a volume name only, a trailing ':' is
433required. Due to these rules, a glob like E<lt>*:E<gt> will find all
434mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
435all files and directories in the current directory.
436
437Note that updirs in the glob pattern are resolved before the matching begins,
438i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
439that a single trailing ':' in the pattern is ignored (unless it's a volume
440name pattern like "*HD:"), i.e. a glob like E<lt>:*:E<gt> will find both
441directories I<and> files (and not, as one might expect, only directories).
442You can, however, use the C<GLOB_MARK> flag to distinguish (without a file
443test) directory names from file names.
444
445If the C<GLOB_MARK> flag is set, all directory paths will have a ':' appended.
446Since a directory like 'lib:' is I<not> a valid I<relative> path on Mac OS,
447both a leading and a trailing colon will be added, when the directory name in
448question doesn't contain any colons (e.g. 'lib' becomes ':lib:').
449
450=back
451
452=head1 SEE ALSO
453
454L<perlfunc/glob>, glob(3)
455
456=head1 AUTHOR
457
458The Perl interface was written by Nathan Torkington E<lt>gnat@frii.comE<gt>,
459and is released under the artistic license.  Further modifications were
460made by Greg Bacon E<lt>gbacon@cs.uah.eduE<gt>, Gurusamy Sarathy
461E<lt>gsar@activestate.comE<gt>, and Thomas Wegner
462E<lt>wegner_thomas@yahoo.comE<gt>.  The C glob code has the
463following copyright:
464
465    Copyright (c) 1989, 1993 The Regents of the University of California.
466    All rights reserved.
467
468    This code is derived from software contributed to Berkeley by
469    Guido van Rossum.
470
471    Redistribution and use in source and binary forms, with or without
472    modification, are permitted provided that the following conditions
473    are met:
474
475    1. Redistributions of source code must retain the above copyright
476       notice, this list of conditions and the following disclaimer.
477    2. Redistributions in binary form must reproduce the above copyright
478       notice, this list of conditions and the following disclaimer in the
479       documentation and/or other materials provided with the distribution.
480    3. Neither the name of the University nor the names of its contributors
481       may be used to endorse or promote products derived from this software
482       without specific prior written permission.
483
484    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
485    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
486    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
487    ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
488    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
489    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
490    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
491    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
492    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
493    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
494    SUCH DAMAGE.
495
496=cut
497