xref: /onnv-gate/usr/src/lib/libparted/common/lib/regex.h (revision 9663:ace9a2ac3683)
1*9663SMark.Logan@Sun.COM /* Definitions for data structures and routines for the regular
2*9663SMark.Logan@Sun.COM    expression library.
3*9663SMark.Logan@Sun.COM    Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006
4*9663SMark.Logan@Sun.COM    Free Software Foundation, Inc.
5*9663SMark.Logan@Sun.COM    This file is part of the GNU C Library.
6*9663SMark.Logan@Sun.COM 
7*9663SMark.Logan@Sun.COM    This program is free software; you can redistribute it and/or modify
8*9663SMark.Logan@Sun.COM    it under the terms of the GNU General Public License as published by
9*9663SMark.Logan@Sun.COM    the Free Software Foundation; either version 2, or (at your option)
10*9663SMark.Logan@Sun.COM    any later version.
11*9663SMark.Logan@Sun.COM 
12*9663SMark.Logan@Sun.COM    This program is distributed in the hope that it will be useful,
13*9663SMark.Logan@Sun.COM    but WITHOUT ANY WARRANTY; without even the implied warranty of
14*9663SMark.Logan@Sun.COM    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*9663SMark.Logan@Sun.COM    GNU General Public License for more details.
16*9663SMark.Logan@Sun.COM 
17*9663SMark.Logan@Sun.COM    You should have received a copy of the GNU General Public License along
18*9663SMark.Logan@Sun.COM    with this program; if not, write to the Free Software Foundation,
19*9663SMark.Logan@Sun.COM    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20*9663SMark.Logan@Sun.COM 
21*9663SMark.Logan@Sun.COM #ifndef _REGEX_H
22*9663SMark.Logan@Sun.COM #define _REGEX_H 1
23*9663SMark.Logan@Sun.COM 
24*9663SMark.Logan@Sun.COM #include <sys/types.h>
25*9663SMark.Logan@Sun.COM 
26*9663SMark.Logan@Sun.COM /* Allow the use in C++ code.  */
27*9663SMark.Logan@Sun.COM #ifdef __cplusplus
28*9663SMark.Logan@Sun.COM extern "C" {
29*9663SMark.Logan@Sun.COM #endif
30*9663SMark.Logan@Sun.COM 
31*9663SMark.Logan@Sun.COM /* Define __USE_GNU_REGEX to declare GNU extensions that violate the
32*9663SMark.Logan@Sun.COM    POSIX name space rules.  */
33*9663SMark.Logan@Sun.COM #undef __USE_GNU_REGEX
34*9663SMark.Logan@Sun.COM #if (defined _GNU_SOURCE					\
35*9663SMark.Logan@Sun.COM      || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE	\
36*9663SMark.Logan@Sun.COM 	 && !defined _XOPEN_SOURCE))
37*9663SMark.Logan@Sun.COM # define __USE_GNU_REGEX 1
38*9663SMark.Logan@Sun.COM #endif
39*9663SMark.Logan@Sun.COM 
40*9663SMark.Logan@Sun.COM #ifdef _REGEX_LARGE_OFFSETS
41*9663SMark.Logan@Sun.COM 
42*9663SMark.Logan@Sun.COM /* Use types and values that are wide enough to represent signed and
43*9663SMark.Logan@Sun.COM    unsigned byte offsets in memory.  This currently works only when
44*9663SMark.Logan@Sun.COM    the regex code is used outside of the GNU C library; it is not yet
45*9663SMark.Logan@Sun.COM    supported within glibc itself, and glibc users should not define
46*9663SMark.Logan@Sun.COM    _REGEX_LARGE_OFFSETS.  */
47*9663SMark.Logan@Sun.COM 
48*9663SMark.Logan@Sun.COM /* The type of the offset of a byte within a string.
49*9663SMark.Logan@Sun.COM    For historical reasons POSIX 1003.1-2004 requires that regoff_t be
50*9663SMark.Logan@Sun.COM    at least as wide as off_t.  However, many common POSIX platforms set
51*9663SMark.Logan@Sun.COM    regoff_t to the more-sensible ssize_t and the Open Group has
52*9663SMark.Logan@Sun.COM    signalled its intention to change the requirement to be that
53*9663SMark.Logan@Sun.COM    regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN
54*9663SMark.Logan@Sun.COM    60 (2005-08-25).  We don't know of any hosts where ssize_t or
55*9663SMark.Logan@Sun.COM    ptrdiff_t is wider than ssize_t, so ssize_t is safe.  */
56*9663SMark.Logan@Sun.COM typedef ssize_t regoff_t;
57*9663SMark.Logan@Sun.COM 
58*9663SMark.Logan@Sun.COM /* The type of nonnegative object indexes.  Traditionally, GNU regex
59*9663SMark.Logan@Sun.COM    uses 'int' for these.  Code that uses __re_idx_t should work
60*9663SMark.Logan@Sun.COM    regardless of whether the type is signed.  */
61*9663SMark.Logan@Sun.COM typedef size_t __re_idx_t;
62*9663SMark.Logan@Sun.COM 
63*9663SMark.Logan@Sun.COM /* The type of object sizes.  */
64*9663SMark.Logan@Sun.COM typedef size_t __re_size_t;
65*9663SMark.Logan@Sun.COM 
66*9663SMark.Logan@Sun.COM /* The type of object sizes, in places where the traditional code
67*9663SMark.Logan@Sun.COM    uses unsigned long int.  */
68*9663SMark.Logan@Sun.COM typedef size_t __re_long_size_t;
69*9663SMark.Logan@Sun.COM 
70*9663SMark.Logan@Sun.COM #else
71*9663SMark.Logan@Sun.COM 
72*9663SMark.Logan@Sun.COM /* Use types that are binary-compatible with the traditional GNU regex
73*9663SMark.Logan@Sun.COM    implementation, which mishandles strings longer than INT_MAX.  */
74*9663SMark.Logan@Sun.COM 
75*9663SMark.Logan@Sun.COM typedef int regoff_t;
76*9663SMark.Logan@Sun.COM typedef int __re_idx_t;
77*9663SMark.Logan@Sun.COM typedef unsigned int __re_size_t;
78*9663SMark.Logan@Sun.COM typedef unsigned long int __re_long_size_t;
79*9663SMark.Logan@Sun.COM 
80*9663SMark.Logan@Sun.COM #endif
81*9663SMark.Logan@Sun.COM 
82*9663SMark.Logan@Sun.COM /* The following two types have to be signed and unsigned integer type
83*9663SMark.Logan@Sun.COM    wide enough to hold a value of a pointer.  For most ANSI compilers
84*9663SMark.Logan@Sun.COM    ptrdiff_t and size_t should be likely OK.  Still size of these two
85*9663SMark.Logan@Sun.COM    types is 2 for Microsoft C.  Ugh... */
86*9663SMark.Logan@Sun.COM typedef long int s_reg_t;
87*9663SMark.Logan@Sun.COM typedef unsigned long int active_reg_t;
88*9663SMark.Logan@Sun.COM 
89*9663SMark.Logan@Sun.COM /* The following bits are used to determine the regexp syntax we
90*9663SMark.Logan@Sun.COM    recognize.  The set/not-set meanings are chosen so that Emacs syntax
91*9663SMark.Logan@Sun.COM    remains the value 0.  The bits are given in alphabetical order, and
92*9663SMark.Logan@Sun.COM    the definitions shifted by one from the previous bit; thus, when we
93*9663SMark.Logan@Sun.COM    add or remove a bit, only one other definition need change.  */
94*9663SMark.Logan@Sun.COM typedef unsigned long int reg_syntax_t;
95*9663SMark.Logan@Sun.COM 
96*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
97*9663SMark.Logan@Sun.COM 
98*9663SMark.Logan@Sun.COM /* If this bit is not set, then \ inside a bracket expression is literal.
99*9663SMark.Logan@Sun.COM    If set, then such a \ quotes the following character.  */
100*9663SMark.Logan@Sun.COM # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
101*9663SMark.Logan@Sun.COM 
102*9663SMark.Logan@Sun.COM /* If this bit is not set, then + and ? are operators, and \+ and \? are
103*9663SMark.Logan@Sun.COM      literals.
104*9663SMark.Logan@Sun.COM    If set, then \+ and \? are operators and + and ? are literals.  */
105*9663SMark.Logan@Sun.COM # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
106*9663SMark.Logan@Sun.COM 
107*9663SMark.Logan@Sun.COM /* If this bit is set, then character classes are supported.  They are:
108*9663SMark.Logan@Sun.COM      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
109*9663SMark.Logan@Sun.COM      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
110*9663SMark.Logan@Sun.COM    If not set, then character classes are not supported.  */
111*9663SMark.Logan@Sun.COM # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
112*9663SMark.Logan@Sun.COM 
113*9663SMark.Logan@Sun.COM /* If this bit is set, then ^ and $ are always anchors (outside bracket
114*9663SMark.Logan@Sun.COM      expressions, of course).
115*9663SMark.Logan@Sun.COM    If this bit is not set, then it depends:
116*9663SMark.Logan@Sun.COM         ^  is an anchor if it is at the beginning of a regular
117*9663SMark.Logan@Sun.COM            expression or after an open-group or an alternation operator;
118*9663SMark.Logan@Sun.COM         $  is an anchor if it is at the end of a regular expression, or
119*9663SMark.Logan@Sun.COM            before a close-group or an alternation operator.
120*9663SMark.Logan@Sun.COM 
121*9663SMark.Logan@Sun.COM    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
122*9663SMark.Logan@Sun.COM    POSIX draft 11.2 says that * etc. in leading positions is undefined.
123*9663SMark.Logan@Sun.COM    We already implemented a previous draft which made those constructs
124*9663SMark.Logan@Sun.COM    invalid, though, so we haven't changed the code back.  */
125*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
126*9663SMark.Logan@Sun.COM 
127*9663SMark.Logan@Sun.COM /* If this bit is set, then special characters are always special
128*9663SMark.Logan@Sun.COM      regardless of where they are in the pattern.
129*9663SMark.Logan@Sun.COM    If this bit is not set, then special characters are special only in
130*9663SMark.Logan@Sun.COM      some contexts; otherwise they are ordinary.  Specifically,
131*9663SMark.Logan@Sun.COM      * + ? and intervals are only special when not after the beginning,
132*9663SMark.Logan@Sun.COM      open-group, or alternation operator.  */
133*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
134*9663SMark.Logan@Sun.COM 
135*9663SMark.Logan@Sun.COM /* If this bit is set, then *, +, ?, and { cannot be first in an re or
136*9663SMark.Logan@Sun.COM      immediately after an alternation or begin-group operator.  */
137*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
138*9663SMark.Logan@Sun.COM 
139*9663SMark.Logan@Sun.COM /* If this bit is set, then . matches newline.
140*9663SMark.Logan@Sun.COM    If not set, then it doesn't.  */
141*9663SMark.Logan@Sun.COM # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
142*9663SMark.Logan@Sun.COM 
143*9663SMark.Logan@Sun.COM /* If this bit is set, then . doesn't match NUL.
144*9663SMark.Logan@Sun.COM    If not set, then it does.  */
145*9663SMark.Logan@Sun.COM # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
146*9663SMark.Logan@Sun.COM 
147*9663SMark.Logan@Sun.COM /* If this bit is set, nonmatching lists [^...] do not match newline.
148*9663SMark.Logan@Sun.COM    If not set, they do.  */
149*9663SMark.Logan@Sun.COM # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
150*9663SMark.Logan@Sun.COM 
151*9663SMark.Logan@Sun.COM /* If this bit is set, either \{...\} or {...} defines an
152*9663SMark.Logan@Sun.COM      interval, depending on RE_NO_BK_BRACES.
153*9663SMark.Logan@Sun.COM    If not set, \{, \}, {, and } are literals.  */
154*9663SMark.Logan@Sun.COM # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
155*9663SMark.Logan@Sun.COM 
156*9663SMark.Logan@Sun.COM /* If this bit is set, +, ? and | aren't recognized as operators.
157*9663SMark.Logan@Sun.COM    If not set, they are.  */
158*9663SMark.Logan@Sun.COM # define RE_LIMITED_OPS (RE_INTERVALS << 1)
159*9663SMark.Logan@Sun.COM 
160*9663SMark.Logan@Sun.COM /* If this bit is set, newline is an alternation operator.
161*9663SMark.Logan@Sun.COM    If not set, newline is literal.  */
162*9663SMark.Logan@Sun.COM # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
163*9663SMark.Logan@Sun.COM 
164*9663SMark.Logan@Sun.COM /* If this bit is set, then `{...}' defines an interval, and \{ and \}
165*9663SMark.Logan@Sun.COM      are literals.
166*9663SMark.Logan@Sun.COM   If not set, then `\{...\}' defines an interval.  */
167*9663SMark.Logan@Sun.COM # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
168*9663SMark.Logan@Sun.COM 
169*9663SMark.Logan@Sun.COM /* If this bit is set, (...) defines a group, and \( and \) are literals.
170*9663SMark.Logan@Sun.COM    If not set, \(...\) defines a group, and ( and ) are literals.  */
171*9663SMark.Logan@Sun.COM # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
172*9663SMark.Logan@Sun.COM 
173*9663SMark.Logan@Sun.COM /* If this bit is set, then \<digit> matches <digit>.
174*9663SMark.Logan@Sun.COM    If not set, then \<digit> is a back-reference.  */
175*9663SMark.Logan@Sun.COM # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
176*9663SMark.Logan@Sun.COM 
177*9663SMark.Logan@Sun.COM /* If this bit is set, then | is an alternation operator, and \| is literal.
178*9663SMark.Logan@Sun.COM    If not set, then \| is an alternation operator, and | is literal.  */
179*9663SMark.Logan@Sun.COM # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
180*9663SMark.Logan@Sun.COM 
181*9663SMark.Logan@Sun.COM /* If this bit is set, then an ending range point collating higher
182*9663SMark.Logan@Sun.COM      than the starting range point, as in [z-a], is invalid.
183*9663SMark.Logan@Sun.COM    If not set, then when ending range point collates higher than the
184*9663SMark.Logan@Sun.COM      starting range point, the range is ignored.  */
185*9663SMark.Logan@Sun.COM # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
186*9663SMark.Logan@Sun.COM 
187*9663SMark.Logan@Sun.COM /* If this bit is set, then an unmatched ) is ordinary.
188*9663SMark.Logan@Sun.COM    If not set, then an unmatched ) is invalid.  */
189*9663SMark.Logan@Sun.COM # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
190*9663SMark.Logan@Sun.COM 
191*9663SMark.Logan@Sun.COM /* If this bit is set, succeed as soon as we match the whole pattern,
192*9663SMark.Logan@Sun.COM    without further backtracking.  */
193*9663SMark.Logan@Sun.COM # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
194*9663SMark.Logan@Sun.COM 
195*9663SMark.Logan@Sun.COM /* If this bit is set, do not process the GNU regex operators.
196*9663SMark.Logan@Sun.COM    If not set, then the GNU regex operators are recognized. */
197*9663SMark.Logan@Sun.COM # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
198*9663SMark.Logan@Sun.COM 
199*9663SMark.Logan@Sun.COM /* If this bit is set, turn on internal regex debugging.
200*9663SMark.Logan@Sun.COM    If not set, and debugging was on, turn it off.
201*9663SMark.Logan@Sun.COM    This only works if regex.c is compiled -DDEBUG.
202*9663SMark.Logan@Sun.COM    We define this bit always, so that all that's needed to turn on
203*9663SMark.Logan@Sun.COM    debugging is to recompile regex.c; the calling code can always have
204*9663SMark.Logan@Sun.COM    this bit set, and it won't affect anything in the normal case. */
205*9663SMark.Logan@Sun.COM # define RE_DEBUG (RE_NO_GNU_OPS << 1)
206*9663SMark.Logan@Sun.COM 
207*9663SMark.Logan@Sun.COM /* If this bit is set, a syntactically invalid interval is treated as
208*9663SMark.Logan@Sun.COM    a string of ordinary characters.  For example, the ERE 'a{1' is
209*9663SMark.Logan@Sun.COM    treated as 'a\{1'.  */
210*9663SMark.Logan@Sun.COM # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
211*9663SMark.Logan@Sun.COM 
212*9663SMark.Logan@Sun.COM /* If this bit is set, then ignore case when matching.
213*9663SMark.Logan@Sun.COM    If not set, then case is significant.  */
214*9663SMark.Logan@Sun.COM # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
215*9663SMark.Logan@Sun.COM 
216*9663SMark.Logan@Sun.COM /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
217*9663SMark.Logan@Sun.COM    for ^, because it is difficult to scan the regex backwards to find
218*9663SMark.Logan@Sun.COM    whether ^ should be special.  */
219*9663SMark.Logan@Sun.COM # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
220*9663SMark.Logan@Sun.COM 
221*9663SMark.Logan@Sun.COM /* If this bit is set, then \{ cannot be first in an bre or
222*9663SMark.Logan@Sun.COM    immediately after an alternation or begin-group operator.  */
223*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
224*9663SMark.Logan@Sun.COM 
225*9663SMark.Logan@Sun.COM /* If this bit is set, then no_sub will be set to 1 during
226*9663SMark.Logan@Sun.COM    re_compile_pattern.  */
227*9663SMark.Logan@Sun.COM # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
228*9663SMark.Logan@Sun.COM 
229*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */
230*9663SMark.Logan@Sun.COM 
231*9663SMark.Logan@Sun.COM /* This global variable defines the particular regexp syntax to use (for
232*9663SMark.Logan@Sun.COM    some interfaces).  When a regexp is compiled, the syntax used is
233*9663SMark.Logan@Sun.COM    stored in the pattern buffer, so changing this does not affect
234*9663SMark.Logan@Sun.COM    already-compiled regexps.  */
235*9663SMark.Logan@Sun.COM extern reg_syntax_t re_syntax_options;
236*9663SMark.Logan@Sun.COM 
237*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
238*9663SMark.Logan@Sun.COM /* Define combinations of the above bits for the standard possibilities.
239*9663SMark.Logan@Sun.COM    (The [[[ comments delimit what gets put into the Texinfo file, so
240*9663SMark.Logan@Sun.COM    don't delete them!)  */
241*9663SMark.Logan@Sun.COM /* [[[begin syntaxes]]] */
242*9663SMark.Logan@Sun.COM # define RE_SYNTAX_EMACS 0
243*9663SMark.Logan@Sun.COM 
244*9663SMark.Logan@Sun.COM # define RE_SYNTAX_AWK							\
245*9663SMark.Logan@Sun.COM   (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
246*9663SMark.Logan@Sun.COM    | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
247*9663SMark.Logan@Sun.COM    | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
248*9663SMark.Logan@Sun.COM    | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
249*9663SMark.Logan@Sun.COM    | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
250*9663SMark.Logan@Sun.COM 
251*9663SMark.Logan@Sun.COM # define RE_SYNTAX_GNU_AWK						\
252*9663SMark.Logan@Sun.COM   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
253*9663SMark.Logan@Sun.COM    & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS		\
254*9663SMark.Logan@Sun.COM        | RE_CONTEXT_INVALID_OPS ))
255*9663SMark.Logan@Sun.COM 
256*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_AWK						\
257*9663SMark.Logan@Sun.COM   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
258*9663SMark.Logan@Sun.COM    | RE_INTERVALS	    | RE_NO_GNU_OPS)
259*9663SMark.Logan@Sun.COM 
260*9663SMark.Logan@Sun.COM # define RE_SYNTAX_GREP							\
261*9663SMark.Logan@Sun.COM   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
262*9663SMark.Logan@Sun.COM    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
263*9663SMark.Logan@Sun.COM    | RE_NEWLINE_ALT)
264*9663SMark.Logan@Sun.COM 
265*9663SMark.Logan@Sun.COM # define RE_SYNTAX_EGREP						\
266*9663SMark.Logan@Sun.COM   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
267*9663SMark.Logan@Sun.COM    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
268*9663SMark.Logan@Sun.COM    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
269*9663SMark.Logan@Sun.COM    | RE_NO_BK_VBAR)
270*9663SMark.Logan@Sun.COM 
271*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_EGREP						\
272*9663SMark.Logan@Sun.COM   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
273*9663SMark.Logan@Sun.COM    | RE_INVALID_INTERVAL_ORD)
274*9663SMark.Logan@Sun.COM 
275*9663SMark.Logan@Sun.COM /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
276*9663SMark.Logan@Sun.COM # define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
277*9663SMark.Logan@Sun.COM 
278*9663SMark.Logan@Sun.COM # define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
279*9663SMark.Logan@Sun.COM 
280*9663SMark.Logan@Sun.COM /* Syntax bits common to both basic and extended POSIX regex syntax.  */
281*9663SMark.Logan@Sun.COM # define _RE_SYNTAX_POSIX_COMMON					\
282*9663SMark.Logan@Sun.COM   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
283*9663SMark.Logan@Sun.COM    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
284*9663SMark.Logan@Sun.COM 
285*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_BASIC						\
286*9663SMark.Logan@Sun.COM   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
287*9663SMark.Logan@Sun.COM 
288*9663SMark.Logan@Sun.COM /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
289*9663SMark.Logan@Sun.COM    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
290*9663SMark.Logan@Sun.COM    isn't minimal, since other operators, such as \`, aren't disabled.  */
291*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
292*9663SMark.Logan@Sun.COM   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
293*9663SMark.Logan@Sun.COM 
294*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_EXTENDED					\
295*9663SMark.Logan@Sun.COM   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
296*9663SMark.Logan@Sun.COM    | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
297*9663SMark.Logan@Sun.COM    | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
298*9663SMark.Logan@Sun.COM    | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
299*9663SMark.Logan@Sun.COM 
300*9663SMark.Logan@Sun.COM /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
301*9663SMark.Logan@Sun.COM    removed and RE_NO_BK_REFS is added.  */
302*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
303*9663SMark.Logan@Sun.COM   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
304*9663SMark.Logan@Sun.COM    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
305*9663SMark.Logan@Sun.COM    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
306*9663SMark.Logan@Sun.COM    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
307*9663SMark.Logan@Sun.COM /* [[[end syntaxes]]] */
308*9663SMark.Logan@Sun.COM 
309*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */
310*9663SMark.Logan@Sun.COM 
311*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
312*9663SMark.Logan@Sun.COM 
313*9663SMark.Logan@Sun.COM /* Maximum number of duplicates an interval can allow.  POSIX-conforming
314*9663SMark.Logan@Sun.COM    systems might define this in <limits.h>, but we want our
315*9663SMark.Logan@Sun.COM    value, so remove any previous define.  */
316*9663SMark.Logan@Sun.COM # ifdef RE_DUP_MAX
317*9663SMark.Logan@Sun.COM #  undef RE_DUP_MAX
318*9663SMark.Logan@Sun.COM # endif
319*9663SMark.Logan@Sun.COM 
320*9663SMark.Logan@Sun.COM /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
321*9663SMark.Logan@Sun.COM    the counter as a 2-byte signed integer.  This is no longer true, so
322*9663SMark.Logan@Sun.COM    RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
323*9663SMark.Logan@Sun.COM    ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined.
324*9663SMark.Logan@Sun.COM    However, there would be a huge performance problem if someone
325*9663SMark.Logan@Sun.COM    actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
326*9663SMark.Logan@Sun.COM    its historical value.  */
327*9663SMark.Logan@Sun.COM # define RE_DUP_MAX (0x7fff)
328*9663SMark.Logan@Sun.COM 
329*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */
330*9663SMark.Logan@Sun.COM 
331*9663SMark.Logan@Sun.COM 
332*9663SMark.Logan@Sun.COM /* POSIX `cflags' bits (i.e., information for `regcomp').  */
333*9663SMark.Logan@Sun.COM 
334*9663SMark.Logan@Sun.COM /* If this bit is set, then use extended regular expression syntax.
335*9663SMark.Logan@Sun.COM    If not set, then use basic regular expression syntax.  */
336*9663SMark.Logan@Sun.COM #define REG_EXTENDED 1
337*9663SMark.Logan@Sun.COM 
338*9663SMark.Logan@Sun.COM /* If this bit is set, then ignore case when matching.
339*9663SMark.Logan@Sun.COM    If not set, then case is significant.  */
340*9663SMark.Logan@Sun.COM #define REG_ICASE (1 << 1)
341*9663SMark.Logan@Sun.COM 
342*9663SMark.Logan@Sun.COM /* If this bit is set, then anchors do not match at newline
343*9663SMark.Logan@Sun.COM      characters in the string.
344*9663SMark.Logan@Sun.COM    If not set, then anchors do match at newlines.  */
345*9663SMark.Logan@Sun.COM #define REG_NEWLINE (1 << 2)
346*9663SMark.Logan@Sun.COM 
347*9663SMark.Logan@Sun.COM /* If this bit is set, then report only success or fail in regexec.
348*9663SMark.Logan@Sun.COM    If not set, then returns differ between not matching and errors.  */
349*9663SMark.Logan@Sun.COM #define REG_NOSUB (1 << 3)
350*9663SMark.Logan@Sun.COM 
351*9663SMark.Logan@Sun.COM 
352*9663SMark.Logan@Sun.COM /* POSIX `eflags' bits (i.e., information for regexec).  */
353*9663SMark.Logan@Sun.COM 
354*9663SMark.Logan@Sun.COM /* If this bit is set, then the beginning-of-line operator doesn't match
355*9663SMark.Logan@Sun.COM      the beginning of the string (presumably because it's not the
356*9663SMark.Logan@Sun.COM      beginning of a line).
357*9663SMark.Logan@Sun.COM    If not set, then the beginning-of-line operator does match the
358*9663SMark.Logan@Sun.COM      beginning of the string.  */
359*9663SMark.Logan@Sun.COM #define REG_NOTBOL 1
360*9663SMark.Logan@Sun.COM 
361*9663SMark.Logan@Sun.COM /* Like REG_NOTBOL, except for the end-of-line.  */
362*9663SMark.Logan@Sun.COM #define REG_NOTEOL (1 << 1)
363*9663SMark.Logan@Sun.COM 
364*9663SMark.Logan@Sun.COM /* Use PMATCH[0] to delimit the start and end of the search in the
365*9663SMark.Logan@Sun.COM    buffer.  */
366*9663SMark.Logan@Sun.COM #define REG_STARTEND (1 << 2)
367*9663SMark.Logan@Sun.COM 
368*9663SMark.Logan@Sun.COM 
369*9663SMark.Logan@Sun.COM /* If any error codes are removed, changed, or added, update the
370*9663SMark.Logan@Sun.COM    `__re_error_msgid' table in regcomp.c.  */
371*9663SMark.Logan@Sun.COM 
372*9663SMark.Logan@Sun.COM typedef enum
373*9663SMark.Logan@Sun.COM {
374*9663SMark.Logan@Sun.COM   _REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
375*9663SMark.Logan@Sun.COM   _REG_NOERROR = 0,	/* Success.  */
376*9663SMark.Logan@Sun.COM   _REG_NOMATCH,		/* Didn't find a match (for regexec).  */
377*9663SMark.Logan@Sun.COM 
378*9663SMark.Logan@Sun.COM   /* POSIX regcomp return error codes.  (In the order listed in the
379*9663SMark.Logan@Sun.COM      standard.)  */
380*9663SMark.Logan@Sun.COM   _REG_BADPAT,		/* Invalid pattern.  */
381*9663SMark.Logan@Sun.COM   _REG_ECOLLATE,	/* Invalid collating element.  */
382*9663SMark.Logan@Sun.COM   _REG_ECTYPE,		/* Invalid character class name.  */
383*9663SMark.Logan@Sun.COM   _REG_EESCAPE,		/* Trailing backslash.  */
384*9663SMark.Logan@Sun.COM   _REG_ESUBREG,		/* Invalid back reference.  */
385*9663SMark.Logan@Sun.COM   _REG_EBRACK,		/* Unmatched left bracket.  */
386*9663SMark.Logan@Sun.COM   _REG_EPAREN,		/* Parenthesis imbalance.  */
387*9663SMark.Logan@Sun.COM   _REG_EBRACE,		/* Unmatched \{.  */
388*9663SMark.Logan@Sun.COM   _REG_BADBR,		/* Invalid contents of \{\}.  */
389*9663SMark.Logan@Sun.COM   _REG_ERANGE,		/* Invalid range end.  */
390*9663SMark.Logan@Sun.COM   _REG_ESPACE,		/* Ran out of memory.  */
391*9663SMark.Logan@Sun.COM   _REG_BADRPT,		/* No preceding re for repetition op.  */
392*9663SMark.Logan@Sun.COM 
393*9663SMark.Logan@Sun.COM   /* Error codes we've added.  */
394*9663SMark.Logan@Sun.COM   _REG_EEND,		/* Premature end.  */
395*9663SMark.Logan@Sun.COM   _REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
396*9663SMark.Logan@Sun.COM   _REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
397*9663SMark.Logan@Sun.COM } reg_errcode_t;
398*9663SMark.Logan@Sun.COM 
399*9663SMark.Logan@Sun.COM #ifdef _XOPEN_SOURCE
400*9663SMark.Logan@Sun.COM # define REG_ENOSYS	_REG_ENOSYS
401*9663SMark.Logan@Sun.COM #endif
402*9663SMark.Logan@Sun.COM #define REG_NOERROR	_REG_NOERROR
403*9663SMark.Logan@Sun.COM #define REG_NOMATCH	_REG_NOMATCH
404*9663SMark.Logan@Sun.COM #define REG_BADPAT	_REG_BADPAT
405*9663SMark.Logan@Sun.COM #define REG_ECOLLATE	_REG_ECOLLATE
406*9663SMark.Logan@Sun.COM #define REG_ECTYPE	_REG_ECTYPE
407*9663SMark.Logan@Sun.COM #define REG_EESCAPE	_REG_EESCAPE
408*9663SMark.Logan@Sun.COM #define REG_ESUBREG	_REG_ESUBREG
409*9663SMark.Logan@Sun.COM #define REG_EBRACK	_REG_EBRACK
410*9663SMark.Logan@Sun.COM #define REG_EPAREN	_REG_EPAREN
411*9663SMark.Logan@Sun.COM #define REG_EBRACE	_REG_EBRACE
412*9663SMark.Logan@Sun.COM #define REG_BADBR	_REG_BADBR
413*9663SMark.Logan@Sun.COM #define REG_ERANGE	_REG_ERANGE
414*9663SMark.Logan@Sun.COM #define REG_ESPACE	_REG_ESPACE
415*9663SMark.Logan@Sun.COM #define REG_BADRPT	_REG_BADRPT
416*9663SMark.Logan@Sun.COM #define REG_EEND	_REG_EEND
417*9663SMark.Logan@Sun.COM #define REG_ESIZE	_REG_ESIZE
418*9663SMark.Logan@Sun.COM #define REG_ERPAREN	_REG_ERPAREN
419*9663SMark.Logan@Sun.COM 
420*9663SMark.Logan@Sun.COM /* struct re_pattern_buffer normally uses member names like `buffer'
421*9663SMark.Logan@Sun.COM    that POSIX does not allow.  In POSIX mode these members have names
422*9663SMark.Logan@Sun.COM    with leading `re_' (e.g., `re_buffer').  */
423*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
424*9663SMark.Logan@Sun.COM # define _REG_RE_NAME(id) id
425*9663SMark.Logan@Sun.COM # define _REG_RM_NAME(id) id
426*9663SMark.Logan@Sun.COM #else
427*9663SMark.Logan@Sun.COM # define _REG_RE_NAME(id) re_##id
428*9663SMark.Logan@Sun.COM # define _REG_RM_NAME(id) rm_##id
429*9663SMark.Logan@Sun.COM #endif
430*9663SMark.Logan@Sun.COM 
431*9663SMark.Logan@Sun.COM /* The user can specify the type of the re_translate member by
432*9663SMark.Logan@Sun.COM    defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned
433*9663SMark.Logan@Sun.COM    char *.  This pollutes the POSIX name space, so in POSIX mode just
434*9663SMark.Logan@Sun.COM    use unsigned char *.  */
435*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
436*9663SMark.Logan@Sun.COM # ifndef RE_TRANSLATE_TYPE
437*9663SMark.Logan@Sun.COM #  define RE_TRANSLATE_TYPE unsigned char *
438*9663SMark.Logan@Sun.COM # endif
439*9663SMark.Logan@Sun.COM # define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
440*9663SMark.Logan@Sun.COM #else
441*9663SMark.Logan@Sun.COM # define REG_TRANSLATE_TYPE unsigned char *
442*9663SMark.Logan@Sun.COM #endif
443*9663SMark.Logan@Sun.COM 
444*9663SMark.Logan@Sun.COM /* This data structure represents a compiled pattern.  Before calling
445*9663SMark.Logan@Sun.COM    the pattern compiler, the fields `buffer', `allocated', `fastmap',
446*9663SMark.Logan@Sun.COM    `translate', and `no_sub' can be set.  After the pattern has been
447*9663SMark.Logan@Sun.COM    compiled, the `re_nsub' field is available.  All other fields are
448*9663SMark.Logan@Sun.COM    private to the regex routines.  */
449*9663SMark.Logan@Sun.COM 
450*9663SMark.Logan@Sun.COM struct re_pattern_buffer
451*9663SMark.Logan@Sun.COM {
452*9663SMark.Logan@Sun.COM   /* Space that holds the compiled pattern.  It is declared as
453*9663SMark.Logan@Sun.COM      `unsigned char *' because its elements are sometimes used as
454*9663SMark.Logan@Sun.COM      array indexes.  */
455*9663SMark.Logan@Sun.COM   unsigned char *_REG_RE_NAME (buffer);
456*9663SMark.Logan@Sun.COM 
457*9663SMark.Logan@Sun.COM   /* Number of bytes to which `buffer' points.  */
458*9663SMark.Logan@Sun.COM   __re_long_size_t _REG_RE_NAME (allocated);
459*9663SMark.Logan@Sun.COM 
460*9663SMark.Logan@Sun.COM   /* Number of bytes actually used in `buffer'.  */
461*9663SMark.Logan@Sun.COM   __re_long_size_t _REG_RE_NAME (used);
462*9663SMark.Logan@Sun.COM 
463*9663SMark.Logan@Sun.COM   /* Syntax setting with which the pattern was compiled.  */
464*9663SMark.Logan@Sun.COM   reg_syntax_t _REG_RE_NAME (syntax);
465*9663SMark.Logan@Sun.COM 
466*9663SMark.Logan@Sun.COM   /* Pointer to a fastmap, if any, otherwise zero.  re_search uses the
467*9663SMark.Logan@Sun.COM      fastmap, if there is one, to skip over impossible starting points
468*9663SMark.Logan@Sun.COM      for matches.  */
469*9663SMark.Logan@Sun.COM   char *_REG_RE_NAME (fastmap);
470*9663SMark.Logan@Sun.COM 
471*9663SMark.Logan@Sun.COM   /* Either a translate table to apply to all characters before
472*9663SMark.Logan@Sun.COM      comparing them, or zero for no translation.  The translation is
473*9663SMark.Logan@Sun.COM      applied to a pattern when it is compiled and to a string when it
474*9663SMark.Logan@Sun.COM      is matched.  */
475*9663SMark.Logan@Sun.COM   REG_TRANSLATE_TYPE _REG_RE_NAME (translate);
476*9663SMark.Logan@Sun.COM 
477*9663SMark.Logan@Sun.COM   /* Number of subexpressions found by the compiler.  */
478*9663SMark.Logan@Sun.COM   size_t re_nsub;
479*9663SMark.Logan@Sun.COM 
480*9663SMark.Logan@Sun.COM   /* Zero if this pattern cannot match the empty string, one else.
481*9663SMark.Logan@Sun.COM      Well, in truth it's used only in `re_search_2', to see whether or
482*9663SMark.Logan@Sun.COM      not we should use the fastmap, so we don't set this absolutely
483*9663SMark.Logan@Sun.COM      perfectly; see `re_compile_fastmap' (the `duplicate' case).  */
484*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (can_be_null) : 1;
485*9663SMark.Logan@Sun.COM 
486*9663SMark.Logan@Sun.COM   /* If REGS_UNALLOCATED, allocate space in the `regs' structure
487*9663SMark.Logan@Sun.COM      for `max (RE_NREGS, re_nsub + 1)' groups.
488*9663SMark.Logan@Sun.COM      If REGS_REALLOCATE, reallocate space if necessary.
489*9663SMark.Logan@Sun.COM      If REGS_FIXED, use what's there.  */
490*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX
491*9663SMark.Logan@Sun.COM # define REGS_UNALLOCATED 0
492*9663SMark.Logan@Sun.COM # define REGS_REALLOCATE 1
493*9663SMark.Logan@Sun.COM # define REGS_FIXED 2
494*9663SMark.Logan@Sun.COM #endif
495*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (regs_allocated) : 2;
496*9663SMark.Logan@Sun.COM 
497*9663SMark.Logan@Sun.COM   /* Set to zero when `regex_compile' compiles a pattern; set to one
498*9663SMark.Logan@Sun.COM      by `re_compile_fastmap' if it updates the fastmap.  */
499*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (fastmap_accurate) : 1;
500*9663SMark.Logan@Sun.COM 
501*9663SMark.Logan@Sun.COM   /* If set, `re_match_2' does not return information about
502*9663SMark.Logan@Sun.COM      subexpressions.  */
503*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (no_sub) : 1;
504*9663SMark.Logan@Sun.COM 
505*9663SMark.Logan@Sun.COM   /* If set, a beginning-of-line anchor doesn't match at the beginning
506*9663SMark.Logan@Sun.COM      of the string.  */
507*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (not_bol) : 1;
508*9663SMark.Logan@Sun.COM 
509*9663SMark.Logan@Sun.COM   /* Similarly for an end-of-line anchor.  */
510*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (not_eol) : 1;
511*9663SMark.Logan@Sun.COM 
512*9663SMark.Logan@Sun.COM   /* If true, an anchor at a newline matches.  */
513*9663SMark.Logan@Sun.COM   unsigned int _REG_RE_NAME (newline_anchor) : 1;
514*9663SMark.Logan@Sun.COM 
515*9663SMark.Logan@Sun.COM /* [[[end pattern_buffer]]] */
516*9663SMark.Logan@Sun.COM };
517*9663SMark.Logan@Sun.COM 
518*9663SMark.Logan@Sun.COM typedef struct re_pattern_buffer regex_t;
519*9663SMark.Logan@Sun.COM 
520*9663SMark.Logan@Sun.COM /* This is the structure we store register match data in.  See
521*9663SMark.Logan@Sun.COM    regex.texinfo for a full description of what registers match.  */
522*9663SMark.Logan@Sun.COM struct re_registers
523*9663SMark.Logan@Sun.COM {
524*9663SMark.Logan@Sun.COM   __re_size_t _REG_RM_NAME (num_regs);
525*9663SMark.Logan@Sun.COM   regoff_t *_REG_RM_NAME (start);
526*9663SMark.Logan@Sun.COM   regoff_t *_REG_RM_NAME (end);
527*9663SMark.Logan@Sun.COM };
528*9663SMark.Logan@Sun.COM 
529*9663SMark.Logan@Sun.COM 
530*9663SMark.Logan@Sun.COM /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
531*9663SMark.Logan@Sun.COM    `re_match_2' returns information about at least this many registers
532*9663SMark.Logan@Sun.COM    the first time a `regs' structure is passed.  */
533*9663SMark.Logan@Sun.COM #if !defined RE_NREGS && defined __USE_GNU_REGEX
534*9663SMark.Logan@Sun.COM # define RE_NREGS 30
535*9663SMark.Logan@Sun.COM #endif
536*9663SMark.Logan@Sun.COM 
537*9663SMark.Logan@Sun.COM 
538*9663SMark.Logan@Sun.COM /* POSIX specification for registers.  Aside from the different names than
539*9663SMark.Logan@Sun.COM    `re_registers', POSIX uses an array of structures, instead of a
540*9663SMark.Logan@Sun.COM    structure of arrays.  */
541*9663SMark.Logan@Sun.COM typedef struct
542*9663SMark.Logan@Sun.COM {
543*9663SMark.Logan@Sun.COM   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
544*9663SMark.Logan@Sun.COM   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
545*9663SMark.Logan@Sun.COM } regmatch_t;
546*9663SMark.Logan@Sun.COM 
547*9663SMark.Logan@Sun.COM /* Declarations for routines.  */
548*9663SMark.Logan@Sun.COM 
549*9663SMark.Logan@Sun.COM /* Sets the current default syntax to SYNTAX, and return the old syntax.
550*9663SMark.Logan@Sun.COM    You can also simply assign to the `re_syntax_options' variable.  */
551*9663SMark.Logan@Sun.COM extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
552*9663SMark.Logan@Sun.COM 
553*9663SMark.Logan@Sun.COM /* Compile the regular expression PATTERN, with length LENGTH
554*9663SMark.Logan@Sun.COM    and syntax given by the global `re_syntax_options', into the buffer
555*9663SMark.Logan@Sun.COM    BUFFER.  Return NULL if successful, and an error string if not.  */
556*9663SMark.Logan@Sun.COM extern const char *re_compile_pattern (const char *__pattern, size_t __length,
557*9663SMark.Logan@Sun.COM 				       struct re_pattern_buffer *__buffer);
558*9663SMark.Logan@Sun.COM 
559*9663SMark.Logan@Sun.COM 
560*9663SMark.Logan@Sun.COM /* Compile a fastmap for the compiled pattern in BUFFER; used to
561*9663SMark.Logan@Sun.COM    accelerate searches.  Return 0 if successful and -2 if was an
562*9663SMark.Logan@Sun.COM    internal error.  */
563*9663SMark.Logan@Sun.COM extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
564*9663SMark.Logan@Sun.COM 
565*9663SMark.Logan@Sun.COM 
566*9663SMark.Logan@Sun.COM /* Search in the string STRING (with length LENGTH) for the pattern
567*9663SMark.Logan@Sun.COM    compiled into BUFFER.  Start searching at position START, for RANGE
568*9663SMark.Logan@Sun.COM    characters.  Return the starting position of the match, -1 for no
569*9663SMark.Logan@Sun.COM    match, or -2 for an internal error.  Also return register
570*9663SMark.Logan@Sun.COM    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
571*9663SMark.Logan@Sun.COM extern regoff_t re_search (struct re_pattern_buffer *__buffer,
572*9663SMark.Logan@Sun.COM 			   const char *__string, __re_idx_t __length,
573*9663SMark.Logan@Sun.COM 			   __re_idx_t __start, regoff_t __range,
574*9663SMark.Logan@Sun.COM 			   struct re_registers *__regs);
575*9663SMark.Logan@Sun.COM 
576*9663SMark.Logan@Sun.COM 
577*9663SMark.Logan@Sun.COM /* Like `re_search', but search in the concatenation of STRING1 and
578*9663SMark.Logan@Sun.COM    STRING2.  Also, stop searching at index START + STOP.  */
579*9663SMark.Logan@Sun.COM extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
580*9663SMark.Logan@Sun.COM 			     const char *__string1, __re_idx_t __length1,
581*9663SMark.Logan@Sun.COM 			     const char *__string2, __re_idx_t __length2,
582*9663SMark.Logan@Sun.COM 			     __re_idx_t __start, regoff_t __range,
583*9663SMark.Logan@Sun.COM 			     struct re_registers *__regs,
584*9663SMark.Logan@Sun.COM 			     __re_idx_t __stop);
585*9663SMark.Logan@Sun.COM 
586*9663SMark.Logan@Sun.COM 
587*9663SMark.Logan@Sun.COM /* Like `re_search', but return how many characters in STRING the regexp
588*9663SMark.Logan@Sun.COM    in BUFFER matched, starting at position START.  */
589*9663SMark.Logan@Sun.COM extern regoff_t re_match (struct re_pattern_buffer *__buffer,
590*9663SMark.Logan@Sun.COM 			  const char *__string, __re_idx_t __length,
591*9663SMark.Logan@Sun.COM 			  __re_idx_t __start, struct re_registers *__regs);
592*9663SMark.Logan@Sun.COM 
593*9663SMark.Logan@Sun.COM 
594*9663SMark.Logan@Sun.COM /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
595*9663SMark.Logan@Sun.COM extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
596*9663SMark.Logan@Sun.COM 			    const char *__string1, __re_idx_t __length1,
597*9663SMark.Logan@Sun.COM 			    const char *__string2, __re_idx_t __length2,
598*9663SMark.Logan@Sun.COM 			    __re_idx_t __start, struct re_registers *__regs,
599*9663SMark.Logan@Sun.COM 			    __re_idx_t __stop);
600*9663SMark.Logan@Sun.COM 
601*9663SMark.Logan@Sun.COM 
602*9663SMark.Logan@Sun.COM /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
603*9663SMark.Logan@Sun.COM    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
604*9663SMark.Logan@Sun.COM    for recording register information.  STARTS and ENDS must be
605*9663SMark.Logan@Sun.COM    allocated with malloc, and must each be at least `NUM_REGS * sizeof
606*9663SMark.Logan@Sun.COM    (regoff_t)' bytes long.
607*9663SMark.Logan@Sun.COM 
608*9663SMark.Logan@Sun.COM    If NUM_REGS == 0, then subsequent matches should allocate their own
609*9663SMark.Logan@Sun.COM    register data.
610*9663SMark.Logan@Sun.COM 
611*9663SMark.Logan@Sun.COM    Unless this function is called, the first search or match using
612*9663SMark.Logan@Sun.COM    PATTERN_BUFFER will allocate its own register data, without
613*9663SMark.Logan@Sun.COM    freeing the old data.  */
614*9663SMark.Logan@Sun.COM extern void re_set_registers (struct re_pattern_buffer *__buffer,
615*9663SMark.Logan@Sun.COM 			      struct re_registers *__regs,
616*9663SMark.Logan@Sun.COM 			      __re_size_t __num_regs,
617*9663SMark.Logan@Sun.COM 			      regoff_t *__starts, regoff_t *__ends);
618*9663SMark.Logan@Sun.COM 
619*9663SMark.Logan@Sun.COM #if defined _REGEX_RE_COMP || defined _LIBC
620*9663SMark.Logan@Sun.COM # ifndef _CRAY
621*9663SMark.Logan@Sun.COM /* 4.2 bsd compatibility.  */
622*9663SMark.Logan@Sun.COM extern char *re_comp (const char *);
623*9663SMark.Logan@Sun.COM extern int re_exec (const char *);
624*9663SMark.Logan@Sun.COM # endif
625*9663SMark.Logan@Sun.COM #endif
626*9663SMark.Logan@Sun.COM 
627*9663SMark.Logan@Sun.COM /* GCC 2.95 and later have "__restrict"; C99 compilers have
628*9663SMark.Logan@Sun.COM    "restrict", and "configure" may have defined "restrict".
629*9663SMark.Logan@Sun.COM    Other compilers use __restrict, __restrict__, and _Restrict, and
630*9663SMark.Logan@Sun.COM    'configure' might #define 'restrict' to those words, so pick a
631*9663SMark.Logan@Sun.COM    different name.  */
632*9663SMark.Logan@Sun.COM #ifndef _Restrict_
633*9663SMark.Logan@Sun.COM # if 199901L <= __STDC_VERSION__
634*9663SMark.Logan@Sun.COM #  define _Restrict_ restrict
635*9663SMark.Logan@Sun.COM # elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)
636*9663SMark.Logan@Sun.COM #  define _Restrict_ __restrict
637*9663SMark.Logan@Sun.COM # else
638*9663SMark.Logan@Sun.COM #  define _Restrict_
639*9663SMark.Logan@Sun.COM # endif
640*9663SMark.Logan@Sun.COM #endif
641*9663SMark.Logan@Sun.COM /* gcc 3.1 and up support the [restrict] syntax.  Don't trust
642*9663SMark.Logan@Sun.COM    sys/cdefs.h's definition of __restrict_arr, though, as it
643*9663SMark.Logan@Sun.COM    mishandles gcc -ansi -pedantic.  */
644*9663SMark.Logan@Sun.COM #ifndef _Restrict_arr_
645*9663SMark.Logan@Sun.COM # if ((199901L <= __STDC_VERSION__					\
646*9663SMark.Logan@Sun.COM        || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__))	\
647*9663SMark.Logan@Sun.COM 	   && !__STRICT_ANSI__))					\
648*9663SMark.Logan@Sun.COM       && !defined __GNUG__)
649*9663SMark.Logan@Sun.COM #  define _Restrict_arr_ _Restrict_
650*9663SMark.Logan@Sun.COM # else
651*9663SMark.Logan@Sun.COM #  define _Restrict_arr_
652*9663SMark.Logan@Sun.COM # endif
653*9663SMark.Logan@Sun.COM #endif
654*9663SMark.Logan@Sun.COM 
655*9663SMark.Logan@Sun.COM /* POSIX compatibility.  */
656*9663SMark.Logan@Sun.COM extern int regcomp (regex_t *_Restrict_ __preg,
657*9663SMark.Logan@Sun.COM 		    const char *_Restrict_ __pattern,
658*9663SMark.Logan@Sun.COM 		    int __cflags);
659*9663SMark.Logan@Sun.COM 
660*9663SMark.Logan@Sun.COM extern int regexec (const regex_t *_Restrict_ __preg,
661*9663SMark.Logan@Sun.COM 		    const char *_Restrict_ __string, size_t __nmatch,
662*9663SMark.Logan@Sun.COM 		    regmatch_t __pmatch[_Restrict_arr_],
663*9663SMark.Logan@Sun.COM 		    int __eflags);
664*9663SMark.Logan@Sun.COM 
665*9663SMark.Logan@Sun.COM extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
666*9663SMark.Logan@Sun.COM 			char *_Restrict_ __errbuf, size_t __errbuf_size);
667*9663SMark.Logan@Sun.COM 
668*9663SMark.Logan@Sun.COM extern void regfree (regex_t *__preg);
669*9663SMark.Logan@Sun.COM 
670*9663SMark.Logan@Sun.COM 
671*9663SMark.Logan@Sun.COM #ifdef __cplusplus
672*9663SMark.Logan@Sun.COM }
673*9663SMark.Logan@Sun.COM #endif	/* C++ */
674*9663SMark.Logan@Sun.COM 
675*9663SMark.Logan@Sun.COM #endif /* regex.h */
676