158185Selan /* Definitions for data structures callers pass the regex library.
258185Selan 
358185Selan    Copyright (C) 1985, 1989-92 Free Software Foundation, Inc.
458185Selan 
558185Selan This file is part of the GNU C++ Library.  This library is free
658185Selan software; you can redistribute it and/or modify it under the terms of
758185Selan the GNU Library General Public License as published by the Free
858185Selan Software Foundation; either version 2 of the License, or (at your
958185Selan option) any later version.  This library is distributed in the hope
1058185Selan that it will be useful, but WITHOUT ANY WARRANTY; without even the
1158185Selan implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
1258185Selan PURPOSE.  See the GNU Library General Public License for more details.
1358185Selan You should have received a copy of the GNU Library General Public
1458185Selan License along with this library; if not, write to the Free Software
1558185Selan Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
1658185Selan */
1758185Selan 
1858185Selan #ifndef __REGEXP_LIBRARY
1958185Selan #define __REGEXP_LIBRARY
2058185Selan 
2158185Selan #if defined(SHORT_NAMES) || defined(VMS)
2258185Selan #define re_compile_pattern	recmppat
2358185Selan #define re_pattern_buffer	repatbuf
2458185Selan #define re_registers		reregs
2558185Selan #endif
2658185Selan 
2758185Selan #ifdef __cplusplus
2858185Selan extern "C" {
2958185Selan #endif
3058185Selan 
3158185Selan /* Define number of parens for which we record the beginnings and ends.
3258185Selan    This affects how much space the `struct re_registers' type takes up.  */
3358185Selan #ifndef RE_NREGS
3458185Selan #define RE_NREGS 10
3558185Selan #endif
3658185Selan 
3758185Selan #define BYTEWIDTH 8
3858185Selan 
3958185Selan 
4058185Selan /* Maximum number of duplicates an interval can allow.  */
4158185Selan #ifndef RE_DUP_MAX /* kludge for AIX, which defines it */
4258185Selan #define RE_DUP_MAX  ((1 << 15) - 1)
4358185Selan #endif
4458185Selan 
4558185Selan /* This defines the various regexp syntaxes.  */
4658185Selan extern int obscure_syntax;
4758185Selan 
4858185Selan 
4958185Selan /* The following bits are used in the obscure_syntax variable to choose among
5058185Selan    alternative regexp syntaxes.  */
5158185Selan 
5258185Selan /* If this bit is set, plain parentheses serve as grouping, and backslash
5358185Selan      parentheses are needed for literal searching.
5458185Selan    If not set, backslash-parentheses are grouping, and plain parentheses
5558185Selan      are for literal searching.  */
5658185Selan #define RE_NO_BK_PARENS	1
5758185Selan 
5858185Selan /* If this bit is set, plain | serves as the `or'-operator, and \| is a
5958185Selan      literal.
6058185Selan    If not set, \| serves as the `or'-operator, and | is a literal.  */
6158185Selan #define RE_NO_BK_VBAR (1 << 1)
6258185Selan 
6358185Selan /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
6458185Selan      literals.
6558185Selan    If set, \+, \? are operators and plain +, ? are literals.  */
6658185Selan #define RE_BK_PLUS_QM (1 << 2)
6758185Selan 
6858185Selan /* If this bit is set, | binds tighter than ^ or $.
6958185Selan    If not set, the contrary.  */
7058185Selan #define RE_TIGHT_VBAR (1 << 3)
7158185Selan 
7258185Selan /* If this bit is set, then treat newline as an OR operator.
7358185Selan    If not set, treat it as a normal character.  */
7458185Selan #define RE_NEWLINE_OR (1 << 4)
7558185Selan 
7658185Selan /* If this bit is set, then special characters may act as normal
7758185Selan    characters in some contexts. Specifically, this applies to:
7858185Selan 	^ -- only special at the beginning, or after ( or |;
7958185Selan 	$ -- only special at the end, or before ) or |;
8058185Selan 	*, +, ? -- only special when not after the beginning, (, or |.
8158185Selan    If this bit is not set, special characters (such as *, ^, and $)
8258185Selan    always have their special meaning regardless of the surrounding
8358185Selan    context.  */
8458185Selan #define RE_CONTEXT_INDEP_OPS (1 << 5)
8558185Selan 
8658185Selan /* If this bit is not set, then \ before anything inside [ and ] is taken as
8758185Selan      a real \.
8858185Selan    If set, then such a \ escapes the following character.  This is a
8958185Selan      special case for awk.  */
9058185Selan #define RE_AWK_CLASS_HACK (1 << 6)
9158185Selan 
9258185Selan /* If this bit is set, then \{ and \} or { and } serve as interval operators.
9358185Selan    If not set, then \{ and \} and { and } are treated as literals.  */
9458185Selan #define RE_INTERVALS (1 << 7)
9558185Selan 
9658185Selan /* If this bit is not set, then \{ and \} serve as interval operators and
9758185Selan      { and } are literals.
9858185Selan    If set, then { and } serve as interval operators and \{ and \} are
9958185Selan      literals.  */
10058185Selan #define RE_NO_BK_CURLY_BRACES (1 << 8)
10158185Selan 
10258185Selan /* If this bit is set, then character classes are supported; they are:
10358185Selan      [:alpha:],	[:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
10458185Selan      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
10558185Selan    If not set, then character classes are not supported.  */
10658185Selan #define RE_CHAR_CLASSES (1 << 9)
10758185Selan 
10858185Selan /* If this bit is set, then the dot re doesn't match a null byte.
10958185Selan    If not set, it does.  */
11058185Selan #define RE_DOT_NOT_NULL (1 << 10)
11158185Selan 
11258185Selan /* If this bit is set, then [^...] doesn't match a newline.
11358185Selan    If not set, it does.  */
11458185Selan #define RE_HAT_NOT_NEWLINE (1 << 11)
11558185Selan 
11658185Selan /* If this bit is set, back references are recognized.
11758185Selan    If not set, they aren't.  */
11858185Selan #define RE_NO_BK_REFS (1 << 12)
11958185Selan 
12058185Selan /* If this bit is set, back references must refer to a preceding
12158185Selan    subexpression.  If not set, a back reference to a nonexistent
12258185Selan    subexpression is treated as literal characters.  */
12358185Selan #define RE_NO_EMPTY_BK_REF (1 << 13)
12458185Selan 
12558185Selan /* If this bit is set, bracket expressions can't be empty.
12658185Selan    If it is set, they can be empty.  */
12758185Selan #define RE_NO_EMPTY_BRACKETS (1 << 14)
12858185Selan 
12958185Selan /* If this bit is set, then *, +, ? and { cannot be first in an re or
13058185Selan    immediately after a |, or a (.  Furthermore, a | cannot be first or
13158185Selan    last in an re, or immediately follow another | or a (.  Also, a ^
13258185Selan    cannot appear in a nonleading position and a $ cannot appear in a
13358185Selan    nontrailing position (outside of bracket expressions, that is).  */
13458185Selan #define RE_CONTEXTUAL_INVALID_OPS (1 << 15)
13558185Selan 
13658185Selan /* If this bit is set, then +, ? and | aren't recognized as operators.
13758185Selan    If it's not, they are.  */
13858185Selan #define RE_LIMITED_OPS (1 << 16)
13958185Selan 
14058185Selan /* If this bit is set, then an ending range point has to collate higher
14158185Selan      or equal to the starting range point.
14258185Selan    If it's not set, then when the ending range point collates higher
14358185Selan      than the starting range point, the range is just considered empty.  */
14458185Selan #define RE_NO_EMPTY_RANGES (1 << 17)
14558185Selan 
14658185Selan /* If this bit is set, then a hyphen (-) can't be an ending range point.
14758185Selan    If it isn't, then it can.  */
14858185Selan #define RE_NO_HYPHEN_RANGE_END (1 << 18)
14958185Selan 
15058185Selan 
15158185Selan /* Define combinations of bits for the standard possibilities.  */
15258185Selan #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
15358185Selan 			| RE_CONTEXT_INDEP_OPS)
15458185Selan #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
15558185Selan 			| RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK)
15658185Selan #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
15758185Selan 			| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
15858185Selan #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
15958185Selan #define RE_SYNTAX_EMACS 0
16058185Selan #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM 		\
16158185Selan 			| RE_CHAR_CLASSES | RE_DOT_NOT_NULL 		\
16258185Selan                         | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF 	\
16358185Selan                         | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS		\
16458185Selan                         | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
16558185Selan 
16658185Selan #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES	   \
16758185Selan 			| RE_NO_BK_VBAR | RE_NO_BK_PARENS 		   \
16858185Selan                         | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES 		   \
16958185Selan                         | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
17058185Selan                         | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES 		   \
17158185Selan                         | RE_NO_HYPHEN_RANGE_END)
17258185Selan 
17358185Selan 
17458185Selan /* This data structure is used to represent a compiled pattern.  */
17558185Selan 
17658185Selan struct re_pattern_buffer
17758185Selan   {
17858185Selan     char *buffer;	/* Space holding the compiled pattern commands.  */
17958185Selan     long allocated;	/* Size of space that `buffer' points to. */
18058185Selan     long used;		/* Length of portion of buffer actually occupied  */
18158185Selan     char *fastmap;	/* Pointer to fastmap, if any, or zero if none.  */
18258185Selan 			/* re_search uses the fastmap, if there is one,
18358185Selan 			   to skip over totally implausible characters.  */
18458185Selan     char *translate;	/* Translate table to apply to all characters before
18558185Selan 		           comparing, or zero for no translation.
18658185Selan 			   The translation is applied to a pattern when it is
18758185Selan                            compiled and to data when it is matched.  */
18858185Selan     char fastmap_accurate;
18958185Selan 			/* Set to zero when a new pattern is stored,
19058185Selan 			   set to one when the fastmap is updated from it.  */
19158185Selan     char can_be_null;   /* Set to one by compiling fastmap
19258185Selan 			   if this pattern might match the null string.
19358185Selan 			   It does not necessarily match the null string
19458185Selan 			   in that case, but if this is zero, it cannot.
19558185Selan 			   2 as value means can match null string
19658185Selan 			   but at end of range or before a character
19758185Selan 			   listed in the fastmap.  */
19858185Selan   };
19958185Selan 
20058185Selan 
20158185Selan /* search.c (search_buffer) needs this one value.  It is defined both in
20258185Selan    regex.c and here.  */
20358185Selan #define RE_EXACTN_VALUE 1
20458185Selan 
20558185Selan 
20658185Selan /* Structure to store register contents data in.
20758185Selan 
20858185Selan    Pass the address of such a structure as an argument to re_match, etc.,
20958185Selan    if you want this information back.
21058185Selan 
21158185Selan    For i from 1 to RE_NREGS - 1, start[i] records the starting index in
21258185Selan    the string of where the ith subexpression matched, and end[i] records
21358185Selan    one after the ending index.  start[0] and end[0] are analogous, for
21458185Selan    the entire pattern.  */
21558185Selan 
21658185Selan struct re_registers
21758185Selan   {
21858185Selan     int start[RE_NREGS];
21958185Selan     int end[RE_NREGS];
22058185Selan   };
22158185Selan 
22258185Selan 
22358185Selan 
22458185Selan #if defined(__STDC__) || defined(__cplusplus)
22558185Selan 
22658185Selan extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
22758185Selan /* Is this really advertised?  */
22858185Selan extern void re_compile_fastmap (struct re_pattern_buffer *);
22958185Selan extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
23058185Selan 		      struct re_registers *);
23158185Selan extern int re_search_2 (struct re_pattern_buffer *, char *, int,
23258185Selan 			char *, int, int, int,
23358185Selan 			struct re_registers *, int);
23458185Selan extern int re_match (struct re_pattern_buffer *, char *, int, int,
23558185Selan 		     struct re_registers *);
23658185Selan extern int re_match_2 (struct re_pattern_buffer *, char *, int,
23758185Selan 		       char *, int, int, struct re_registers *, int);
23858185Selan 
23958185Selan /* 4.2 bsd compatibility.  */
240*60385Selan extern char *re_comp (const char *);
241*60385Selan extern int re_exec (const char *);
24258185Selan 
24358185Selan #else /* !__STDC__ */
24458185Selan 
24558185Selan #define const /* nothing */
24658185Selan extern char *re_compile_pattern ();
24758185Selan /* Is this really advertised? */
24858185Selan extern void re_compile_fastmap ();
24958185Selan extern int re_search (), re_search_2 ();
25058185Selan extern int re_match (), re_match_2 ();
25158185Selan 
25258185Selan /* 4.2 bsd compatibility.  */
25358185Selan extern char *re_comp ();
25458185Selan extern int re_exec ();
25558185Selan 
25658185Selan #endif /* __STDC__ */
25758185Selan 
25858185Selan 
25958185Selan #ifdef SYNTAX_TABLE
26058185Selan extern char *re_syntax_table;
26158185Selan #endif
26258185Selan 
26358185Selan #ifdef __cplusplus
26458185Selan extern int re_max_failures;
26558185Selan }
26658185Selan #endif
26758185Selan 
26858185Selan #endif /* !__REGEXP_LIBRARY */
269