xref: /netbsd-src/external/gpl3/gdb/dist/include/xregex2.h (revision e663ba6e3a60083e70de702e9d54bf486a57b6a7)
198b9484cSchristos /* Definitions for data structures and routines for the regular
298b9484cSchristos    expression library, version 0.12.
398b9484cSchristos 
4*e663ba6eSchristos    Copyright (C) 1985-2024 Free Software Foundation, Inc.
598b9484cSchristos 
698b9484cSchristos    This file is part of the GNU C Library.  Its master source is NOT part of
798b9484cSchristos    the C library, however.  The master source lives in /gd/gnu/lib.
898b9484cSchristos 
998b9484cSchristos    The GNU C Library is free software; you can redistribute it and/or
1098b9484cSchristos    modify it under the terms of the GNU Lesser General Public
1198b9484cSchristos    License as published by the Free Software Foundation; either
1298b9484cSchristos    version 2.1 of the License, or (at your option) any later version.
1398b9484cSchristos 
1498b9484cSchristos    The GNU C Library is distributed in the hope that it will be useful,
1598b9484cSchristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
1698b9484cSchristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1798b9484cSchristos    Lesser General Public License for more details.
1898b9484cSchristos 
1998b9484cSchristos    You should have received a copy of the GNU Lesser General Public
2098b9484cSchristos    License along with the GNU C Library; if not, write to the Free
2198b9484cSchristos    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
2298b9484cSchristos    02110-1301 USA.  */
2398b9484cSchristos 
2498b9484cSchristos #ifndef _REGEX_H
2598b9484cSchristos #define _REGEX_H 1
2698b9484cSchristos 
2798b9484cSchristos /* Allow the use in C++ code.  */
2898b9484cSchristos #ifdef __cplusplus
2998b9484cSchristos extern "C" {
3098b9484cSchristos #endif
3198b9484cSchristos 
3298b9484cSchristos /* POSIX says that <sys/types.h> must be included (by the caller) before
3398b9484cSchristos    <regex.h>.  */
3498b9484cSchristos 
3598b9484cSchristos #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
3698b9484cSchristos /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
3798b9484cSchristos    should be there.  */
3898b9484cSchristos # include <stddef.h>
3998b9484cSchristos #endif
4098b9484cSchristos 
4198b9484cSchristos /* The following two types have to be signed and unsigned integer type
4298b9484cSchristos    wide enough to hold a value of a pointer.  For most ANSI compilers
4398b9484cSchristos    ptrdiff_t and size_t should be likely OK.  Still size of these two
4498b9484cSchristos    types is 2 for Microsoft C.  Ugh... */
4598b9484cSchristos typedef long int s_reg_t;
4698b9484cSchristos typedef unsigned long int active_reg_t;
4798b9484cSchristos 
4898b9484cSchristos /* The following bits are used to determine the regexp syntax we
4998b9484cSchristos    recognize.  The set/not-set meanings are chosen so that Emacs syntax
5098b9484cSchristos    remains the value 0.  The bits are given in alphabetical order, and
5198b9484cSchristos    the definitions shifted by one from the previous bit; thus, when we
5298b9484cSchristos    add or remove a bit, only one other definition need change.  */
5398b9484cSchristos typedef unsigned long int reg_syntax_t;
5498b9484cSchristos 
5598b9484cSchristos /* If this bit is not set, then \ inside a bracket expression is literal.
5698b9484cSchristos    If set, then such a \ quotes the following character.  */
5798b9484cSchristos #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
5898b9484cSchristos 
5998b9484cSchristos /* If this bit is not set, then + and ? are operators, and \+ and \? are
6098b9484cSchristos      literals.
6198b9484cSchristos    If set, then \+ and \? are operators and + and ? are literals.  */
6298b9484cSchristos #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
6398b9484cSchristos 
6498b9484cSchristos /* If this bit is set, then character classes are supported.  They are:
6598b9484cSchristos      [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
6698b9484cSchristos      [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
6798b9484cSchristos    If not set, then character classes are not supported.  */
6898b9484cSchristos #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
6998b9484cSchristos 
7098b9484cSchristos /* If this bit is set, then ^ and $ are always anchors (outside bracket
7198b9484cSchristos      expressions, of course).
7298b9484cSchristos    If this bit is not set, then it depends:
7398b9484cSchristos         ^  is an anchor if it is at the beginning of a regular
7498b9484cSchristos            expression or after an open-group or an alternation operator;
7598b9484cSchristos         $  is an anchor if it is at the end of a regular expression, or
7698b9484cSchristos            before a close-group or an alternation operator.
7798b9484cSchristos 
7898b9484cSchristos    This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
7998b9484cSchristos    POSIX draft 11.2 says that * etc. in leading positions is undefined.
8098b9484cSchristos    We already implemented a previous draft which made those constructs
8198b9484cSchristos    invalid, though, so we haven't changed the code back.  */
8298b9484cSchristos #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
8398b9484cSchristos 
8498b9484cSchristos /* If this bit is set, then special characters are always special
8598b9484cSchristos      regardless of where they are in the pattern.
8698b9484cSchristos    If this bit is not set, then special characters are special only in
8798b9484cSchristos      some contexts; otherwise they are ordinary.  Specifically,
8898b9484cSchristos      * + ? and intervals are only special when not after the beginning,
8998b9484cSchristos      open-group, or alternation operator.  */
9098b9484cSchristos #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
9198b9484cSchristos 
9298b9484cSchristos /* If this bit is set, then *, +, ?, and { cannot be first in an re or
9398b9484cSchristos      immediately after an alternation or begin-group operator.  */
9498b9484cSchristos #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
9598b9484cSchristos 
9698b9484cSchristos /* If this bit is set, then . matches newline.
9798b9484cSchristos    If not set, then it doesn't.  */
9898b9484cSchristos #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
9998b9484cSchristos 
10098b9484cSchristos /* If this bit is set, then . doesn't match NUL.
10198b9484cSchristos    If not set, then it does.  */
10298b9484cSchristos #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
10398b9484cSchristos 
10498b9484cSchristos /* If this bit is set, nonmatching lists [^...] do not match newline.
10598b9484cSchristos    If not set, they do.  */
10698b9484cSchristos #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
10798b9484cSchristos 
10898b9484cSchristos /* If this bit is set, either \{...\} or {...} defines an
10998b9484cSchristos      interval, depending on RE_NO_BK_BRACES.
11098b9484cSchristos    If not set, \{, \}, {, and } are literals.  */
11198b9484cSchristos #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
11298b9484cSchristos 
11398b9484cSchristos /* If this bit is set, +, ? and | aren't recognized as operators.
11498b9484cSchristos    If not set, they are.  */
11598b9484cSchristos #define RE_LIMITED_OPS (RE_INTERVALS << 1)
11698b9484cSchristos 
11798b9484cSchristos /* If this bit is set, newline is an alternation operator.
11898b9484cSchristos    If not set, newline is literal.  */
11998b9484cSchristos #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
12098b9484cSchristos 
12198b9484cSchristos /* If this bit is set, then `{...}' defines an interval, and \{ and \}
12298b9484cSchristos      are literals.
12398b9484cSchristos   If not set, then `\{...\}' defines an interval.  */
12498b9484cSchristos #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
12598b9484cSchristos 
12698b9484cSchristos /* If this bit is set, (...) defines a group, and \( and \) are literals.
12798b9484cSchristos    If not set, \(...\) defines a group, and ( and ) are literals.  */
12898b9484cSchristos #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
12998b9484cSchristos 
13098b9484cSchristos /* If this bit is set, then \<digit> matches <digit>.
13198b9484cSchristos    If not set, then \<digit> is a back-reference.  */
13298b9484cSchristos #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
13398b9484cSchristos 
13498b9484cSchristos /* If this bit is set, then | is an alternation operator, and \| is literal.
13598b9484cSchristos    If not set, then \| is an alternation operator, and | is literal.  */
13698b9484cSchristos #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
13798b9484cSchristos 
13898b9484cSchristos /* If this bit is set, then an ending range point collating higher
13998b9484cSchristos      than the starting range point, as in [z-a], is invalid.
14098b9484cSchristos    If not set, then when ending range point collates higher than the
14198b9484cSchristos      starting range point, the range is ignored.  */
14298b9484cSchristos #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
14398b9484cSchristos 
14498b9484cSchristos /* If this bit is set, then an unmatched ) is ordinary.
14598b9484cSchristos    If not set, then an unmatched ) is invalid.  */
14698b9484cSchristos #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
14798b9484cSchristos 
14898b9484cSchristos /* If this bit is set, succeed as soon as we match the whole pattern,
14998b9484cSchristos    without further backtracking.  */
15098b9484cSchristos #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
15198b9484cSchristos 
15298b9484cSchristos /* If this bit is set, do not process the GNU regex operators.
15398b9484cSchristos    If not set, then the GNU regex operators are recognized. */
15498b9484cSchristos #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
15598b9484cSchristos 
15698b9484cSchristos /* If this bit is set, turn on internal regex debugging.
15798b9484cSchristos    If not set, and debugging was on, turn it off.
15898b9484cSchristos    This only works if regex.c is compiled -DDEBUG.
15998b9484cSchristos    We define this bit always, so that all that's needed to turn on
16098b9484cSchristos    debugging is to recompile regex.c; the calling code can always have
16198b9484cSchristos    this bit set, and it won't affect anything in the normal case. */
16298b9484cSchristos #define RE_DEBUG (RE_NO_GNU_OPS << 1)
16398b9484cSchristos 
16498b9484cSchristos /* If this bit is set, a syntactically invalid interval is treated as
16598b9484cSchristos    a string of ordinary characters.  For example, the ERE 'a{1' is
16698b9484cSchristos    treated as 'a\{1'.  */
16798b9484cSchristos #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
16898b9484cSchristos 
16998b9484cSchristos /* This global variable defines the particular regexp syntax to use (for
17098b9484cSchristos    some interfaces).  When a regexp is compiled, the syntax used is
17198b9484cSchristos    stored in the pattern buffer, so changing this does not affect
17298b9484cSchristos    already-compiled regexps.  */
17398b9484cSchristos extern reg_syntax_t re_syntax_options;
17498b9484cSchristos 
17598b9484cSchristos /* Define combinations of the above bits for the standard possibilities.
17698b9484cSchristos    (The [[[ comments delimit what gets put into the Texinfo file, so
17798b9484cSchristos    don't delete them!)  */
17898b9484cSchristos /* [[[begin syntaxes]]] */
17998b9484cSchristos #define RE_SYNTAX_EMACS 0
18098b9484cSchristos 
18198b9484cSchristos #define RE_SYNTAX_AWK							\
18298b9484cSchristos   (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
18398b9484cSchristos    | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
18498b9484cSchristos    | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
18598b9484cSchristos    | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
18698b9484cSchristos    | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
18798b9484cSchristos 
18898b9484cSchristos #define RE_SYNTAX_GNU_AWK						\
18998b9484cSchristos   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
19098b9484cSchristos    & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
19198b9484cSchristos 
19298b9484cSchristos #define RE_SYNTAX_POSIX_AWK 						\
19398b9484cSchristos   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
19498b9484cSchristos    | RE_INTERVALS	    | RE_NO_GNU_OPS)
19598b9484cSchristos 
19698b9484cSchristos #define RE_SYNTAX_GREP							\
19798b9484cSchristos   (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
19898b9484cSchristos    | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
19998b9484cSchristos    | RE_NEWLINE_ALT)
20098b9484cSchristos 
20198b9484cSchristos #define RE_SYNTAX_EGREP							\
20298b9484cSchristos   (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
20398b9484cSchristos    | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
20498b9484cSchristos    | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
20598b9484cSchristos    | RE_NO_BK_VBAR)
20698b9484cSchristos 
20798b9484cSchristos #define RE_SYNTAX_POSIX_EGREP						\
20898b9484cSchristos   (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
20998b9484cSchristos    | RE_INVALID_INTERVAL_ORD)
21098b9484cSchristos 
21198b9484cSchristos /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
21298b9484cSchristos #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
21398b9484cSchristos 
21498b9484cSchristos #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
21598b9484cSchristos 
21698b9484cSchristos /* Syntax bits common to both basic and extended POSIX regex syntax.  */
21798b9484cSchristos #define _RE_SYNTAX_POSIX_COMMON						\
21898b9484cSchristos   (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
21998b9484cSchristos    | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
22098b9484cSchristos 
22198b9484cSchristos #define RE_SYNTAX_POSIX_BASIC						\
22298b9484cSchristos   (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
22398b9484cSchristos 
22498b9484cSchristos /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
22598b9484cSchristos    RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
22698b9484cSchristos    isn't minimal, since other operators, such as \`, aren't disabled.  */
22798b9484cSchristos #define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
22898b9484cSchristos   (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
22998b9484cSchristos 
23098b9484cSchristos #define RE_SYNTAX_POSIX_EXTENDED					\
23198b9484cSchristos   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
23298b9484cSchristos    | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
23398b9484cSchristos    | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
23498b9484cSchristos    | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
23598b9484cSchristos 
23698b9484cSchristos /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
23798b9484cSchristos    removed and RE_NO_BK_REFS is added.  */
23898b9484cSchristos #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
23998b9484cSchristos   (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
24098b9484cSchristos    | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
24198b9484cSchristos    | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
24298b9484cSchristos    | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
24398b9484cSchristos /* [[[end syntaxes]]] */
24498b9484cSchristos 
24598b9484cSchristos /* Maximum number of duplicates an interval can allow.  Some systems
24698b9484cSchristos    (erroneously) define this in other header files, but we want our
24798b9484cSchristos    value, so remove any previous define.  */
24898b9484cSchristos #ifdef RE_DUP_MAX
24998b9484cSchristos # undef RE_DUP_MAX
25098b9484cSchristos #endif
25198b9484cSchristos /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
25298b9484cSchristos #define RE_DUP_MAX (0x7fff)
25398b9484cSchristos 
25498b9484cSchristos 
25598b9484cSchristos /* POSIX `cflags' bits (i.e., information for `regcomp').  */
25698b9484cSchristos 
25798b9484cSchristos /* If this bit is set, then use extended regular expression syntax.
25898b9484cSchristos    If not set, then use basic regular expression syntax.  */
25998b9484cSchristos #define REG_EXTENDED 1
26098b9484cSchristos 
26198b9484cSchristos /* If this bit is set, then ignore case when matching.
26298b9484cSchristos    If not set, then case is significant.  */
26398b9484cSchristos #define REG_ICASE (REG_EXTENDED << 1)
26498b9484cSchristos 
26598b9484cSchristos /* If this bit is set, then anchors do not match at newline
26698b9484cSchristos      characters in the string.
26798b9484cSchristos    If not set, then anchors do match at newlines.  */
26898b9484cSchristos #define REG_NEWLINE (REG_ICASE << 1)
26998b9484cSchristos 
27098b9484cSchristos /* If this bit is set, then report only success or fail in regexec.
27198b9484cSchristos    If not set, then returns differ between not matching and errors.  */
27298b9484cSchristos #define REG_NOSUB (REG_NEWLINE << 1)
27398b9484cSchristos 
27498b9484cSchristos 
27598b9484cSchristos /* POSIX `eflags' bits (i.e., information for regexec).  */
27698b9484cSchristos 
27798b9484cSchristos /* If this bit is set, then the beginning-of-line operator doesn't match
27898b9484cSchristos      the beginning of the string (presumably because it's not the
27998b9484cSchristos      beginning of a line).
28098b9484cSchristos    If not set, then the beginning-of-line operator does match the
28198b9484cSchristos      beginning of the string.  */
28298b9484cSchristos #define REG_NOTBOL 1
28398b9484cSchristos 
28498b9484cSchristos /* Like REG_NOTBOL, except for the end-of-line.  */
28598b9484cSchristos #define REG_NOTEOL (1 << 1)
28698b9484cSchristos 
28798b9484cSchristos 
28898b9484cSchristos /* If any error codes are removed, changed, or added, update the
28998b9484cSchristos    `re_error_msg' table in regex.c.  */
29098b9484cSchristos typedef enum
29198b9484cSchristos {
29298b9484cSchristos #ifdef _XOPEN_SOURCE
29398b9484cSchristos   REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
29498b9484cSchristos #endif
29598b9484cSchristos 
29698b9484cSchristos   REG_NOERROR = 0,	/* Success.  */
29798b9484cSchristos   REG_NOMATCH,		/* Didn't find a match (for regexec).  */
29898b9484cSchristos 
29998b9484cSchristos   /* POSIX regcomp return error codes.  (In the order listed in the
30098b9484cSchristos      standard.)  */
30198b9484cSchristos   REG_BADPAT,		/* Invalid pattern.  */
30298b9484cSchristos   REG_ECOLLATE,		/* Not implemented.  */
30398b9484cSchristos   REG_ECTYPE,		/* Invalid character class name.  */
30498b9484cSchristos   REG_EESCAPE,		/* Trailing backslash.  */
30598b9484cSchristos   REG_ESUBREG,		/* Invalid back reference.  */
30698b9484cSchristos   REG_EBRACK,		/* Unmatched left bracket.  */
30798b9484cSchristos   REG_EPAREN,		/* Parenthesis imbalance.  */
30898b9484cSchristos   REG_EBRACE,		/* Unmatched \{.  */
30998b9484cSchristos   REG_BADBR,		/* Invalid contents of \{\}.  */
31098b9484cSchristos   REG_ERANGE,		/* Invalid range end.  */
31198b9484cSchristos   REG_ESPACE,		/* Ran out of memory.  */
31298b9484cSchristos   REG_BADRPT,		/* No preceding re for repetition op.  */
31398b9484cSchristos 
31498b9484cSchristos   /* Error codes we've added.  */
31598b9484cSchristos   REG_EEND,		/* Premature end.  */
31698b9484cSchristos   REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
31798b9484cSchristos   REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
31898b9484cSchristos } reg_errcode_t;
31998b9484cSchristos 
32098b9484cSchristos /* This data structure represents a compiled pattern.  Before calling
32198b9484cSchristos    the pattern compiler, the fields `buffer', `allocated', `fastmap',
32298b9484cSchristos    `translate', and `no_sub' can be set.  After the pattern has been
32398b9484cSchristos    compiled, the `re_nsub' field is available.  All other fields are
32498b9484cSchristos    private to the regex routines.  */
32598b9484cSchristos 
32698b9484cSchristos #ifndef RE_TRANSLATE_TYPE
32798b9484cSchristos # define RE_TRANSLATE_TYPE char *
32898b9484cSchristos #endif
32998b9484cSchristos 
33098b9484cSchristos struct re_pattern_buffer
33198b9484cSchristos {
33298b9484cSchristos /* [[[begin pattern_buffer]]] */
33398b9484cSchristos 	/* Space that holds the compiled pattern.  It is declared as
33498b9484cSchristos           `unsigned char *' because its elements are
33598b9484cSchristos            sometimes used as array indexes.  */
33698b9484cSchristos   unsigned char *buffer;
33798b9484cSchristos 
33898b9484cSchristos 	/* Number of bytes to which `buffer' points.  */
33998b9484cSchristos   unsigned long int allocated;
34098b9484cSchristos 
34198b9484cSchristos 	/* Number of bytes actually used in `buffer'.  */
34298b9484cSchristos   unsigned long int used;
34398b9484cSchristos 
34498b9484cSchristos         /* Syntax setting with which the pattern was compiled.  */
34598b9484cSchristos   reg_syntax_t syntax;
34698b9484cSchristos 
34798b9484cSchristos         /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
34898b9484cSchristos            the fastmap, if there is one, to skip over impossible
34998b9484cSchristos            starting points for matches.  */
35098b9484cSchristos   char *fastmap;
35198b9484cSchristos 
35298b9484cSchristos         /* Either a translate table to apply to all characters before
35398b9484cSchristos            comparing them, or zero for no translation.  The translation
35498b9484cSchristos            is applied to a pattern when it is compiled and to a string
35598b9484cSchristos            when it is matched.  */
35698b9484cSchristos   RE_TRANSLATE_TYPE translate;
35798b9484cSchristos 
35898b9484cSchristos 	/* Number of subexpressions found by the compiler.  */
35998b9484cSchristos   size_t re_nsub;
36098b9484cSchristos 
36198b9484cSchristos         /* Zero if this pattern cannot match the empty string, one else.
36298b9484cSchristos            Well, in truth it's used only in `re_search_2', to see
36398b9484cSchristos            whether or not we should use the fastmap, so we don't set
36498b9484cSchristos            this absolutely perfectly; see `re_compile_fastmap' (the
36598b9484cSchristos            `duplicate' case).  */
36698b9484cSchristos   unsigned can_be_null : 1;
36798b9484cSchristos 
36898b9484cSchristos         /* If REGS_UNALLOCATED, allocate space in the `regs' structure
36998b9484cSchristos              for `max (RE_NREGS, re_nsub + 1)' groups.
37098b9484cSchristos            If REGS_REALLOCATE, reallocate space if necessary.
37198b9484cSchristos            If REGS_FIXED, use what's there.  */
37298b9484cSchristos #define REGS_UNALLOCATED 0
37398b9484cSchristos #define REGS_REALLOCATE 1
37498b9484cSchristos #define REGS_FIXED 2
37598b9484cSchristos   unsigned regs_allocated : 2;
37698b9484cSchristos 
37798b9484cSchristos         /* Set to zero when `regex_compile' compiles a pattern; set to one
37898b9484cSchristos            by `re_compile_fastmap' if it updates the fastmap.  */
37998b9484cSchristos   unsigned fastmap_accurate : 1;
38098b9484cSchristos 
38198b9484cSchristos         /* If set, `re_match_2' does not return information about
38298b9484cSchristos            subexpressions.  */
38398b9484cSchristos   unsigned no_sub : 1;
38498b9484cSchristos 
38598b9484cSchristos         /* If set, a beginning-of-line anchor doesn't match at the
38698b9484cSchristos            beginning of the string.  */
38798b9484cSchristos   unsigned not_bol : 1;
38898b9484cSchristos 
38998b9484cSchristos         /* Similarly for an end-of-line anchor.  */
39098b9484cSchristos   unsigned not_eol : 1;
39198b9484cSchristos 
39298b9484cSchristos         /* If true, an anchor at a newline matches.  */
39398b9484cSchristos   unsigned newline_anchor : 1;
39498b9484cSchristos 
39598b9484cSchristos /* [[[end pattern_buffer]]] */
39698b9484cSchristos };
39798b9484cSchristos 
39898b9484cSchristos typedef struct re_pattern_buffer regex_t;
39998b9484cSchristos 
40098b9484cSchristos /* Type for byte offsets within the string.  POSIX mandates this.  */
40198b9484cSchristos typedef int regoff_t;
40298b9484cSchristos 
40398b9484cSchristos 
40498b9484cSchristos /* This is the structure we store register match data in.  See
40598b9484cSchristos    regex.texinfo for a full description of what registers match.  */
40698b9484cSchristos struct re_registers
40798b9484cSchristos {
40898b9484cSchristos   unsigned num_regs;
40998b9484cSchristos   regoff_t *start;
41098b9484cSchristos   regoff_t *end;
41198b9484cSchristos };
41298b9484cSchristos 
41398b9484cSchristos 
41498b9484cSchristos /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
41598b9484cSchristos    `re_match_2' returns information about at least this many registers
41698b9484cSchristos    the first time a `regs' structure is passed.  */
41798b9484cSchristos #ifndef RE_NREGS
41898b9484cSchristos # define RE_NREGS 30
41998b9484cSchristos #endif
42098b9484cSchristos 
42198b9484cSchristos 
42298b9484cSchristos /* POSIX specification for registers.  Aside from the different names than
42398b9484cSchristos    `re_registers', POSIX uses an array of structures, instead of a
42498b9484cSchristos    structure of arrays.  */
42598b9484cSchristos typedef struct
42698b9484cSchristos {
42798b9484cSchristos   regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
42898b9484cSchristos   regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
42998b9484cSchristos } regmatch_t;
43098b9484cSchristos 
43198b9484cSchristos /* Declarations for routines.  */
43298b9484cSchristos 
43398b9484cSchristos /* To avoid duplicating every routine declaration -- once with a
43498b9484cSchristos    prototype (if we are ANSI), and once without (if we aren't) -- we
43598b9484cSchristos    use the following macro to declare argument types.  This
43698b9484cSchristos    unfortunately clutters up the declarations a bit, but I think it's
43798b9484cSchristos    worth it.  */
43898b9484cSchristos 
43998b9484cSchristos /* Sets the current default syntax to SYNTAX, and return the old syntax.
44098b9484cSchristos    You can also simply assign to the `re_syntax_options' variable.  */
44198b9484cSchristos extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
44298b9484cSchristos 
44398b9484cSchristos /* Compile the regular expression PATTERN, with length LENGTH
44498b9484cSchristos    and syntax given by the global `re_syntax_options', into the buffer
44598b9484cSchristos    BUFFER.  Return NULL if successful, and an error string if not.  */
44698b9484cSchristos extern const char *re_compile_pattern (const char *pattern, size_t length,
44798b9484cSchristos                                        struct re_pattern_buffer *buffer);
44898b9484cSchristos 
44998b9484cSchristos 
45098b9484cSchristos /* Compile a fastmap for the compiled pattern in BUFFER; used to
45198b9484cSchristos    accelerate searches.  Return 0 if successful and -2 if was an
45298b9484cSchristos    internal error.  */
45398b9484cSchristos extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
45498b9484cSchristos 
45598b9484cSchristos 
45698b9484cSchristos /* Search in the string STRING (with length LENGTH) for the pattern
45798b9484cSchristos    compiled into BUFFER.  Start searching at position START, for RANGE
45898b9484cSchristos    characters.  Return the starting position of the match, -1 for no
45998b9484cSchristos    match, or -2 for an internal error.  Also return register
46098b9484cSchristos    information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
46198b9484cSchristos extern int re_search (struct re_pattern_buffer *buffer, const char *string,
46298b9484cSchristos                       int length, int start, int range,
46398b9484cSchristos                       struct re_registers *regs);
46498b9484cSchristos 
46598b9484cSchristos 
46698b9484cSchristos /* Like `re_search', but search in the concatenation of STRING1 and
46798b9484cSchristos    STRING2.  Also, stop searching at index START + STOP.  */
46898b9484cSchristos extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1,
46998b9484cSchristos                         int length1, const char *string2, int length2,
47098b9484cSchristos                         int start, int range, struct re_registers *regs,
47198b9484cSchristos                         int stop);
47298b9484cSchristos 
47398b9484cSchristos 
47498b9484cSchristos /* Like `re_search', but return how many characters in STRING the regexp
47598b9484cSchristos    in BUFFER matched, starting at position START.  */
47698b9484cSchristos extern int re_match (struct re_pattern_buffer *buffer, const char *string,
47798b9484cSchristos                      int length, int start, struct re_registers *regs);
47898b9484cSchristos 
47998b9484cSchristos 
48098b9484cSchristos /* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
48198b9484cSchristos extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1,
48298b9484cSchristos                        int length1, const char *string2, int length2,
48398b9484cSchristos                        int start, struct re_registers *regs, int stop);
48498b9484cSchristos 
48598b9484cSchristos 
48698b9484cSchristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
48798b9484cSchristos    ENDS.  Subsequent matches using BUFFER and REGS will use this memory
48898b9484cSchristos    for recording register information.  STARTS and ENDS must be
48998b9484cSchristos    allocated with malloc, and must each be at least `NUM_REGS * sizeof
49098b9484cSchristos    (regoff_t)' bytes long.
49198b9484cSchristos 
49298b9484cSchristos    If NUM_REGS == 0, then subsequent matches should allocate their own
49398b9484cSchristos    register data.
49498b9484cSchristos 
49598b9484cSchristos    Unless this function is called, the first search or match using
49698b9484cSchristos    PATTERN_BUFFER will allocate its own register data, without
49798b9484cSchristos    freeing the old data.  */
49898b9484cSchristos extern void re_set_registers (struct re_pattern_buffer *buffer,
49998b9484cSchristos                               struct re_registers *regs,
50098b9484cSchristos                               unsigned num_regs, regoff_t *starts,
50198b9484cSchristos                               regoff_t *ends);
50298b9484cSchristos 
50398b9484cSchristos #if defined _REGEX_RE_COMP || defined _LIBC
50498b9484cSchristos # ifndef _CRAY
50598b9484cSchristos /* 4.2 bsd compatibility.  */
50698b9484cSchristos extern char *re_comp (const char *);
50798b9484cSchristos extern int re_exec (const char *);
50898b9484cSchristos # endif
50998b9484cSchristos #endif
51098b9484cSchristos 
51198b9484cSchristos /* GCC 2.95 and later have "__restrict"; C99 compilers have
51298b9484cSchristos    "restrict", and "configure" may have defined "restrict".  */
51398b9484cSchristos #ifndef __restrict
51498b9484cSchristos # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
51598b9484cSchristos #  if defined restrict || 199901L <= __STDC_VERSION__
51698b9484cSchristos #   define __restrict restrict
51798b9484cSchristos #  else
51898b9484cSchristos #   define __restrict
51998b9484cSchristos #  endif
52098b9484cSchristos # endif
52198b9484cSchristos #endif
52298b9484cSchristos 
52398b9484cSchristos /* GCC 3.1 and later support declaring arrays as non-overlapping
52498b9484cSchristos    using the syntax array_name[restrict]  */
52598b9484cSchristos #ifndef __restrict_arr
52698b9484cSchristos # if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__)
52798b9484cSchristos #  define __restrict_arr
52898b9484cSchristos # else
52998b9484cSchristos #  define __restrict_arr __restrict
53098b9484cSchristos # endif
53198b9484cSchristos #endif
53298b9484cSchristos 
53398b9484cSchristos /* POSIX compatibility.  */
53498b9484cSchristos extern int regcomp (regex_t *__restrict __preg,
53598b9484cSchristos                     const char *__restrict __pattern,
53698b9484cSchristos                     int __cflags);
53798b9484cSchristos 
53898b9484cSchristos #if (__GNUC__)
53998b9484cSchristos __extension__
54098b9484cSchristos #endif
54198b9484cSchristos extern int regexec (const regex_t *__restrict __preg,
54298b9484cSchristos                     const char *__restrict __string, size_t __nmatch,
54398b9484cSchristos                     regmatch_t __pmatch[__restrict_arr],
54498b9484cSchristos                     int __eflags);
54598b9484cSchristos 
54698b9484cSchristos extern size_t regerror (int __errcode, const regex_t *__preg,
54798b9484cSchristos                         char *__errbuf, size_t __errbuf_size);
54898b9484cSchristos 
54998b9484cSchristos extern void regfree (regex_t *__preg);
55098b9484cSchristos 
55198b9484cSchristos 
55298b9484cSchristos #ifdef __cplusplus
55398b9484cSchristos }
55498b9484cSchristos #endif	/* C++ */
55598b9484cSchristos 
55698b9484cSchristos #endif /* regex.h */
55798b9484cSchristos 
55898b9484cSchristos /*
55998b9484cSchristos Local variables:
56098b9484cSchristos make-backup-files: t
56198b9484cSchristos version-control: t
56298b9484cSchristos trim-versions-without-asking: nil
56398b9484cSchristos End:
56498b9484cSchristos */
565