1*58185Selan /* Definitions for data structures callers pass the regex library.
2*58185Selan 
3*58185Selan    Copyright (C) 1985, 1989-92 Free Software Foundation, Inc.
4*58185Selan 
5*58185Selan This file is part of the GNU C++ Library.  This library is free
6*58185Selan software; you can redistribute it and/or modify it under the terms of
7*58185Selan the GNU Library General Public License as published by the Free
8*58185Selan Software Foundation; either version 2 of the License, or (at your
9*58185Selan option) any later version.  This library is distributed in the hope
10*58185Selan that it will be useful, but WITHOUT ANY WARRANTY; without even the
11*58185Selan implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12*58185Selan PURPOSE.  See the GNU Library General Public License for more details.
13*58185Selan You should have received a copy of the GNU Library General Public
14*58185Selan License along with this library; if not, write to the Free Software
15*58185Selan Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
16*58185Selan */
17*58185Selan 
18*58185Selan #ifndef __REGEXP_LIBRARY
19*58185Selan #define __REGEXP_LIBRARY
20*58185Selan 
21*58185Selan #if defined(SHORT_NAMES) || defined(VMS)
22*58185Selan #define re_compile_pattern	recmppat
23*58185Selan #define re_pattern_buffer	repatbuf
24*58185Selan #define re_registers		reregs
25*58185Selan #endif
26*58185Selan 
27*58185Selan #ifdef __cplusplus
28*58185Selan extern "C" {
29*58185Selan #endif
30*58185Selan 
31*58185Selan /* Define number of parens for which we record the beginnings and ends.
32*58185Selan    This affects how much space the `struct re_registers' type takes up.  */
33*58185Selan #ifndef RE_NREGS
34*58185Selan #define RE_NREGS 10
35*58185Selan #endif
36*58185Selan 
37*58185Selan #define BYTEWIDTH 8
38*58185Selan 
39*58185Selan 
40*58185Selan /* Maximum number of duplicates an interval can allow.  */
41*58185Selan #ifndef RE_DUP_MAX /* kludge for AIX, which defines it */
42*58185Selan #define RE_DUP_MAX  ((1 << 15) - 1)
43*58185Selan #endif
44*58185Selan 
45*58185Selan /* This defines the various regexp syntaxes.  */
46*58185Selan extern int obscure_syntax;
47*58185Selan 
48*58185Selan 
49*58185Selan /* The following bits are used in the obscure_syntax variable to choose among
50*58185Selan    alternative regexp syntaxes.  */
51*58185Selan 
52*58185Selan /* If this bit is set, plain parentheses serve as grouping, and backslash
53*58185Selan      parentheses are needed for literal searching.
54*58185Selan    If not set, backslash-parentheses are grouping, and plain parentheses
55*58185Selan      are for literal searching.  */
56*58185Selan #define RE_NO_BK_PARENS	1
57*58185Selan 
58*58185Selan /* If this bit is set, plain | serves as the `or'-operator, and \| is a
59*58185Selan      literal.
60*58185Selan    If not set, \| serves as the `or'-operator, and | is a literal.  */
61*58185Selan #define RE_NO_BK_VBAR (1 << 1)
62*58185Selan 
63*58185Selan /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
64*58185Selan      literals.
65*58185Selan    If set, \+, \? are operators and plain +, ? are literals.  */
66*58185Selan #define RE_BK_PLUS_QM (1 << 2)
67*58185Selan 
68*58185Selan /* If this bit is set, | binds tighter than ^ or $.
69*58185Selan    If not set, the contrary.  */
70*58185Selan #define RE_TIGHT_VBAR (1 << 3)
71*58185Selan 
72*58185Selan /* If this bit is set, then treat newline as an OR operator.
73*58185Selan    If not set, treat it as a normal character.  */
74*58185Selan #define RE_NEWLINE_OR (1 << 4)
75*58185Selan 
76*58185Selan /* If this bit is set, then special characters may act as normal
77*58185Selan    characters in some contexts. Specifically, this applies to:
78*58185Selan 	^ -- only special at the beginning, or after ( or |;
79*58185Selan 	$ -- only special at the end, or before ) or |;
80*58185Selan 	*, +, ? -- only special when not after the beginning, (, or |.
81*58185Selan    If this bit is not set, special characters (such as *, ^, and $)
82*58185Selan    always have their special meaning regardless of the surrounding
83*58185Selan    context.  */
84*58185Selan #define RE_CONTEXT_INDEP_OPS (1 << 5)
85*58185Selan 
86*58185Selan /* If this bit is not set, then \ before anything inside [ and ] is taken as
87*58185Selan      a real \.
88*58185Selan    If set, then such a \ escapes the following character.  This is a
89*58185Selan      special case for awk.  */
90*58185Selan #define RE_AWK_CLASS_HACK (1 << 6)
91*58185Selan 
92*58185Selan /* If this bit is set, then \{ and \} or { and } serve as interval operators.
93*58185Selan    If not set, then \{ and \} and { and } are treated as literals.  */
94*58185Selan #define RE_INTERVALS (1 << 7)
95*58185Selan 
96*58185Selan /* If this bit is not set, then \{ and \} serve as interval operators and
97*58185Selan      { and } are literals.
98*58185Selan    If set, then { and } serve as interval operators and \{ and \} are
99*58185Selan      literals.  */
100*58185Selan #define RE_NO_BK_CURLY_BRACES (1 << 8)
101*58185Selan 
102*58185Selan /* If this bit is set, then character classes are supported; they are:
103*58185Selan      [:alpha:],	[:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
104*58185Selan      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
105*58185Selan    If not set, then character classes are not supported.  */
106*58185Selan #define RE_CHAR_CLASSES (1 << 9)
107*58185Selan 
108*58185Selan /* If this bit is set, then the dot re doesn't match a null byte.
109*58185Selan    If not set, it does.  */
110*58185Selan #define RE_DOT_NOT_NULL (1 << 10)
111*58185Selan 
112*58185Selan /* If this bit is set, then [^...] doesn't match a newline.
113*58185Selan    If not set, it does.  */
114*58185Selan #define RE_HAT_NOT_NEWLINE (1 << 11)
115*58185Selan 
116*58185Selan /* If this bit is set, back references are recognized.
117*58185Selan    If not set, they aren't.  */
118*58185Selan #define RE_NO_BK_REFS (1 << 12)
119*58185Selan 
120*58185Selan /* If this bit is set, back references must refer to a preceding
121*58185Selan    subexpression.  If not set, a back reference to a nonexistent
122*58185Selan    subexpression is treated as literal characters.  */
123*58185Selan #define RE_NO_EMPTY_BK_REF (1 << 13)
124*58185Selan 
125*58185Selan /* If this bit is set, bracket expressions can't be empty.
126*58185Selan    If it is set, they can be empty.  */
127*58185Selan #define RE_NO_EMPTY_BRACKETS (1 << 14)
128*58185Selan 
129*58185Selan /* If this bit is set, then *, +, ? and { cannot be first in an re or
130*58185Selan    immediately after a |, or a (.  Furthermore, a | cannot be first or
131*58185Selan    last in an re, or immediately follow another | or a (.  Also, a ^
132*58185Selan    cannot appear in a nonleading position and a $ cannot appear in a
133*58185Selan    nontrailing position (outside of bracket expressions, that is).  */
134*58185Selan #define RE_CONTEXTUAL_INVALID_OPS (1 << 15)
135*58185Selan 
136*58185Selan /* If this bit is set, then +, ? and | aren't recognized as operators.
137*58185Selan    If it's not, they are.  */
138*58185Selan #define RE_LIMITED_OPS (1 << 16)
139*58185Selan 
140*58185Selan /* If this bit is set, then an ending range point has to collate higher
141*58185Selan      or equal to the starting range point.
142*58185Selan    If it's not set, then when the ending range point collates higher
143*58185Selan      than the starting range point, the range is just considered empty.  */
144*58185Selan #define RE_NO_EMPTY_RANGES (1 << 17)
145*58185Selan 
146*58185Selan /* If this bit is set, then a hyphen (-) can't be an ending range point.
147*58185Selan    If it isn't, then it can.  */
148*58185Selan #define RE_NO_HYPHEN_RANGE_END (1 << 18)
149*58185Selan 
150*58185Selan 
151*58185Selan /* Define combinations of bits for the standard possibilities.  */
152*58185Selan #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
153*58185Selan 			| RE_CONTEXT_INDEP_OPS)
154*58185Selan #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
155*58185Selan 			| RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK)
156*58185Selan #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
157*58185Selan 			| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
158*58185Selan #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
159*58185Selan #define RE_SYNTAX_EMACS 0
160*58185Selan #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM 		\
161*58185Selan 			| RE_CHAR_CLASSES | RE_DOT_NOT_NULL 		\
162*58185Selan                         | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF 	\
163*58185Selan                         | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS		\
164*58185Selan                         | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
165*58185Selan 
166*58185Selan #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES	   \
167*58185Selan 			| RE_NO_BK_VBAR | RE_NO_BK_PARENS 		   \
168*58185Selan                         | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES 		   \
169*58185Selan                         | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
170*58185Selan                         | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES 		   \
171*58185Selan                         | RE_NO_HYPHEN_RANGE_END)
172*58185Selan 
173*58185Selan 
174*58185Selan /* This data structure is used to represent a compiled pattern.  */
175*58185Selan 
176*58185Selan struct re_pattern_buffer
177*58185Selan   {
178*58185Selan     char *buffer;	/* Space holding the compiled pattern commands.  */
179*58185Selan     long allocated;	/* Size of space that `buffer' points to. */
180*58185Selan     long used;		/* Length of portion of buffer actually occupied  */
181*58185Selan     char *fastmap;	/* Pointer to fastmap, if any, or zero if none.  */
182*58185Selan 			/* re_search uses the fastmap, if there is one,
183*58185Selan 			   to skip over totally implausible characters.  */
184*58185Selan     char *translate;	/* Translate table to apply to all characters before
185*58185Selan 		           comparing, or zero for no translation.
186*58185Selan 			   The translation is applied to a pattern when it is
187*58185Selan                            compiled and to data when it is matched.  */
188*58185Selan     char fastmap_accurate;
189*58185Selan 			/* Set to zero when a new pattern is stored,
190*58185Selan 			   set to one when the fastmap is updated from it.  */
191*58185Selan     char can_be_null;   /* Set to one by compiling fastmap
192*58185Selan 			   if this pattern might match the null string.
193*58185Selan 			   It does not necessarily match the null string
194*58185Selan 			   in that case, but if this is zero, it cannot.
195*58185Selan 			   2 as value means can match null string
196*58185Selan 			   but at end of range or before a character
197*58185Selan 			   listed in the fastmap.  */
198*58185Selan   };
199*58185Selan 
200*58185Selan 
201*58185Selan /* search.c (search_buffer) needs this one value.  It is defined both in
202*58185Selan    regex.c and here.  */
203*58185Selan #define RE_EXACTN_VALUE 1
204*58185Selan 
205*58185Selan 
206*58185Selan /* Structure to store register contents data in.
207*58185Selan 
208*58185Selan    Pass the address of such a structure as an argument to re_match, etc.,
209*58185Selan    if you want this information back.
210*58185Selan 
211*58185Selan    For i from 1 to RE_NREGS - 1, start[i] records the starting index in
212*58185Selan    the string of where the ith subexpression matched, and end[i] records
213*58185Selan    one after the ending index.  start[0] and end[0] are analogous, for
214*58185Selan    the entire pattern.  */
215*58185Selan 
216*58185Selan struct re_registers
217*58185Selan   {
218*58185Selan     int start[RE_NREGS];
219*58185Selan     int end[RE_NREGS];
220*58185Selan   };
221*58185Selan 
222*58185Selan 
223*58185Selan 
224*58185Selan #if defined(__STDC__) || defined(__cplusplus)
225*58185Selan 
226*58185Selan extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
227*58185Selan /* Is this really advertised?  */
228*58185Selan extern void re_compile_fastmap (struct re_pattern_buffer *);
229*58185Selan extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
230*58185Selan 		      struct re_registers *);
231*58185Selan extern int re_search_2 (struct re_pattern_buffer *, char *, int,
232*58185Selan 			char *, int, int, int,
233*58185Selan 			struct re_registers *, int);
234*58185Selan extern int re_match (struct re_pattern_buffer *, char *, int, int,
235*58185Selan 		     struct re_registers *);
236*58185Selan extern int re_match_2 (struct re_pattern_buffer *, char *, int,
237*58185Selan 		       char *, int, int, struct re_registers *, int);
238*58185Selan 
239*58185Selan /* 4.2 bsd compatibility.  */
240*58185Selan extern char *re_comp (char *);
241*58185Selan extern int re_exec (char *);
242*58185Selan 
243*58185Selan #else /* !__STDC__ */
244*58185Selan 
245*58185Selan #define const /* nothing */
246*58185Selan extern char *re_compile_pattern ();
247*58185Selan /* Is this really advertised? */
248*58185Selan extern void re_compile_fastmap ();
249*58185Selan extern int re_search (), re_search_2 ();
250*58185Selan extern int re_match (), re_match_2 ();
251*58185Selan 
252*58185Selan /* 4.2 bsd compatibility.  */
253*58185Selan extern char *re_comp ();
254*58185Selan extern int re_exec ();
255*58185Selan 
256*58185Selan #endif /* __STDC__ */
257*58185Selan 
258*58185Selan 
259*58185Selan #ifdef SYNTAX_TABLE
260*58185Selan extern char *re_syntax_table;
261*58185Selan #endif
262*58185Selan 
263*58185Selan #ifdef __cplusplus
264*58185Selan extern int re_max_failures;
265*58185Selan }
266*58185Selan #endif
267*58185Selan 
268*58185Selan #endif /* !__REGEXP_LIBRARY */
269