198b9484cSchristos /* Definitions for data structures and routines for the regular 298b9484cSchristos expression library, version 0.12. 398b9484cSchristos 4*e663ba6eSchristos Copyright (C) 1985-2024 Free Software Foundation, Inc. 598b9484cSchristos 698b9484cSchristos This file is part of the GNU C Library. Its master source is NOT part of 798b9484cSchristos the C library, however. The master source lives in /gd/gnu/lib. 898b9484cSchristos 998b9484cSchristos The GNU C Library is free software; you can redistribute it and/or 1098b9484cSchristos modify it under the terms of the GNU Lesser General Public 1198b9484cSchristos License as published by the Free Software Foundation; either 1298b9484cSchristos version 2.1 of the License, or (at your option) any later version. 1398b9484cSchristos 1498b9484cSchristos The GNU C Library is distributed in the hope that it will be useful, 1598b9484cSchristos but WITHOUT ANY WARRANTY; without even the implied warranty of 1698b9484cSchristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1798b9484cSchristos Lesser General Public License for more details. 1898b9484cSchristos 1998b9484cSchristos You should have received a copy of the GNU Lesser General Public 2098b9484cSchristos License along with the GNU C Library; if not, write to the Free 2198b9484cSchristos Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 2298b9484cSchristos 02110-1301 USA. */ 2398b9484cSchristos 2498b9484cSchristos #ifndef _REGEX_H 2598b9484cSchristos #define _REGEX_H 1 2698b9484cSchristos 2798b9484cSchristos /* Allow the use in C++ code. */ 2898b9484cSchristos #ifdef __cplusplus 2998b9484cSchristos extern "C" { 3098b9484cSchristos #endif 3198b9484cSchristos 3298b9484cSchristos /* POSIX says that <sys/types.h> must be included (by the caller) before 3398b9484cSchristos <regex.h>. */ 3498b9484cSchristos 3598b9484cSchristos #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 3698b9484cSchristos /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 3798b9484cSchristos should be there. */ 3898b9484cSchristos # include <stddef.h> 3998b9484cSchristos #endif 4098b9484cSchristos 4198b9484cSchristos /* The following two types have to be signed and unsigned integer type 4298b9484cSchristos wide enough to hold a value of a pointer. For most ANSI compilers 4398b9484cSchristos ptrdiff_t and size_t should be likely OK. Still size of these two 4498b9484cSchristos types is 2 for Microsoft C. Ugh... */ 4598b9484cSchristos typedef long int s_reg_t; 4698b9484cSchristos typedef unsigned long int active_reg_t; 4798b9484cSchristos 4898b9484cSchristos /* The following bits are used to determine the regexp syntax we 4998b9484cSchristos recognize. The set/not-set meanings are chosen so that Emacs syntax 5098b9484cSchristos remains the value 0. The bits are given in alphabetical order, and 5198b9484cSchristos the definitions shifted by one from the previous bit; thus, when we 5298b9484cSchristos add or remove a bit, only one other definition need change. */ 5398b9484cSchristos typedef unsigned long int reg_syntax_t; 5498b9484cSchristos 5598b9484cSchristos /* If this bit is not set, then \ inside a bracket expression is literal. 5698b9484cSchristos If set, then such a \ quotes the following character. */ 5798b9484cSchristos #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 5898b9484cSchristos 5998b9484cSchristos /* If this bit is not set, then + and ? are operators, and \+ and \? are 6098b9484cSchristos literals. 6198b9484cSchristos If set, then \+ and \? are operators and + and ? are literals. */ 6298b9484cSchristos #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 6398b9484cSchristos 6498b9484cSchristos /* If this bit is set, then character classes are supported. They are: 6598b9484cSchristos [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 6698b9484cSchristos [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 6798b9484cSchristos If not set, then character classes are not supported. */ 6898b9484cSchristos #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 6998b9484cSchristos 7098b9484cSchristos /* If this bit is set, then ^ and $ are always anchors (outside bracket 7198b9484cSchristos expressions, of course). 7298b9484cSchristos If this bit is not set, then it depends: 7398b9484cSchristos ^ is an anchor if it is at the beginning of a regular 7498b9484cSchristos expression or after an open-group or an alternation operator; 7598b9484cSchristos $ is an anchor if it is at the end of a regular expression, or 7698b9484cSchristos before a close-group or an alternation operator. 7798b9484cSchristos 7898b9484cSchristos This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 7998b9484cSchristos POSIX draft 11.2 says that * etc. in leading positions is undefined. 8098b9484cSchristos We already implemented a previous draft which made those constructs 8198b9484cSchristos invalid, though, so we haven't changed the code back. */ 8298b9484cSchristos #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 8398b9484cSchristos 8498b9484cSchristos /* If this bit is set, then special characters are always special 8598b9484cSchristos regardless of where they are in the pattern. 8698b9484cSchristos If this bit is not set, then special characters are special only in 8798b9484cSchristos some contexts; otherwise they are ordinary. Specifically, 8898b9484cSchristos * + ? and intervals are only special when not after the beginning, 8998b9484cSchristos open-group, or alternation operator. */ 9098b9484cSchristos #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 9198b9484cSchristos 9298b9484cSchristos /* If this bit is set, then *, +, ?, and { cannot be first in an re or 9398b9484cSchristos immediately after an alternation or begin-group operator. */ 9498b9484cSchristos #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 9598b9484cSchristos 9698b9484cSchristos /* If this bit is set, then . matches newline. 9798b9484cSchristos If not set, then it doesn't. */ 9898b9484cSchristos #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 9998b9484cSchristos 10098b9484cSchristos /* If this bit is set, then . doesn't match NUL. 10198b9484cSchristos If not set, then it does. */ 10298b9484cSchristos #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 10398b9484cSchristos 10498b9484cSchristos /* If this bit is set, nonmatching lists [^...] do not match newline. 10598b9484cSchristos If not set, they do. */ 10698b9484cSchristos #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 10798b9484cSchristos 10898b9484cSchristos /* If this bit is set, either \{...\} or {...} defines an 10998b9484cSchristos interval, depending on RE_NO_BK_BRACES. 11098b9484cSchristos If not set, \{, \}, {, and } are literals. */ 11198b9484cSchristos #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 11298b9484cSchristos 11398b9484cSchristos /* If this bit is set, +, ? and | aren't recognized as operators. 11498b9484cSchristos If not set, they are. */ 11598b9484cSchristos #define RE_LIMITED_OPS (RE_INTERVALS << 1) 11698b9484cSchristos 11798b9484cSchristos /* If this bit is set, newline is an alternation operator. 11898b9484cSchristos If not set, newline is literal. */ 11998b9484cSchristos #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 12098b9484cSchristos 12198b9484cSchristos /* If this bit is set, then `{...}' defines an interval, and \{ and \} 12298b9484cSchristos are literals. 12398b9484cSchristos If not set, then `\{...\}' defines an interval. */ 12498b9484cSchristos #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 12598b9484cSchristos 12698b9484cSchristos /* If this bit is set, (...) defines a group, and \( and \) are literals. 12798b9484cSchristos If not set, \(...\) defines a group, and ( and ) are literals. */ 12898b9484cSchristos #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 12998b9484cSchristos 13098b9484cSchristos /* If this bit is set, then \<digit> matches <digit>. 13198b9484cSchristos If not set, then \<digit> is a back-reference. */ 13298b9484cSchristos #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 13398b9484cSchristos 13498b9484cSchristos /* If this bit is set, then | is an alternation operator, and \| is literal. 13598b9484cSchristos If not set, then \| is an alternation operator, and | is literal. */ 13698b9484cSchristos #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 13798b9484cSchristos 13898b9484cSchristos /* If this bit is set, then an ending range point collating higher 13998b9484cSchristos than the starting range point, as in [z-a], is invalid. 14098b9484cSchristos If not set, then when ending range point collates higher than the 14198b9484cSchristos starting range point, the range is ignored. */ 14298b9484cSchristos #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 14398b9484cSchristos 14498b9484cSchristos /* If this bit is set, then an unmatched ) is ordinary. 14598b9484cSchristos If not set, then an unmatched ) is invalid. */ 14698b9484cSchristos #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 14798b9484cSchristos 14898b9484cSchristos /* If this bit is set, succeed as soon as we match the whole pattern, 14998b9484cSchristos without further backtracking. */ 15098b9484cSchristos #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 15198b9484cSchristos 15298b9484cSchristos /* If this bit is set, do not process the GNU regex operators. 15398b9484cSchristos If not set, then the GNU regex operators are recognized. */ 15498b9484cSchristos #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 15598b9484cSchristos 15698b9484cSchristos /* If this bit is set, turn on internal regex debugging. 15798b9484cSchristos If not set, and debugging was on, turn it off. 15898b9484cSchristos This only works if regex.c is compiled -DDEBUG. 15998b9484cSchristos We define this bit always, so that all that's needed to turn on 16098b9484cSchristos debugging is to recompile regex.c; the calling code can always have 16198b9484cSchristos this bit set, and it won't affect anything in the normal case. */ 16298b9484cSchristos #define RE_DEBUG (RE_NO_GNU_OPS << 1) 16398b9484cSchristos 16498b9484cSchristos /* If this bit is set, a syntactically invalid interval is treated as 16598b9484cSchristos a string of ordinary characters. For example, the ERE 'a{1' is 16698b9484cSchristos treated as 'a\{1'. */ 16798b9484cSchristos #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 16898b9484cSchristos 16998b9484cSchristos /* This global variable defines the particular regexp syntax to use (for 17098b9484cSchristos some interfaces). When a regexp is compiled, the syntax used is 17198b9484cSchristos stored in the pattern buffer, so changing this does not affect 17298b9484cSchristos already-compiled regexps. */ 17398b9484cSchristos extern reg_syntax_t re_syntax_options; 17498b9484cSchristos 17598b9484cSchristos /* Define combinations of the above bits for the standard possibilities. 17698b9484cSchristos (The [[[ comments delimit what gets put into the Texinfo file, so 17798b9484cSchristos don't delete them!) */ 17898b9484cSchristos /* [[[begin syntaxes]]] */ 17998b9484cSchristos #define RE_SYNTAX_EMACS 0 18098b9484cSchristos 18198b9484cSchristos #define RE_SYNTAX_AWK \ 18298b9484cSchristos (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 18398b9484cSchristos | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 18498b9484cSchristos | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 18598b9484cSchristos | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 18698b9484cSchristos | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 18798b9484cSchristos 18898b9484cSchristos #define RE_SYNTAX_GNU_AWK \ 18998b9484cSchristos ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 19098b9484cSchristos & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) 19198b9484cSchristos 19298b9484cSchristos #define RE_SYNTAX_POSIX_AWK \ 19398b9484cSchristos (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 19498b9484cSchristos | RE_INTERVALS | RE_NO_GNU_OPS) 19598b9484cSchristos 19698b9484cSchristos #define RE_SYNTAX_GREP \ 19798b9484cSchristos (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 19898b9484cSchristos | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 19998b9484cSchristos | RE_NEWLINE_ALT) 20098b9484cSchristos 20198b9484cSchristos #define RE_SYNTAX_EGREP \ 20298b9484cSchristos (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 20398b9484cSchristos | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 20498b9484cSchristos | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 20598b9484cSchristos | RE_NO_BK_VBAR) 20698b9484cSchristos 20798b9484cSchristos #define RE_SYNTAX_POSIX_EGREP \ 20898b9484cSchristos (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 20998b9484cSchristos | RE_INVALID_INTERVAL_ORD) 21098b9484cSchristos 21198b9484cSchristos /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 21298b9484cSchristos #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 21398b9484cSchristos 21498b9484cSchristos #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 21598b9484cSchristos 21698b9484cSchristos /* Syntax bits common to both basic and extended POSIX regex syntax. */ 21798b9484cSchristos #define _RE_SYNTAX_POSIX_COMMON \ 21898b9484cSchristos (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 21998b9484cSchristos | RE_INTERVALS | RE_NO_EMPTY_RANGES) 22098b9484cSchristos 22198b9484cSchristos #define RE_SYNTAX_POSIX_BASIC \ 22298b9484cSchristos (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 22398b9484cSchristos 22498b9484cSchristos /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 22598b9484cSchristos RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 22698b9484cSchristos isn't minimal, since other operators, such as \`, aren't disabled. */ 22798b9484cSchristos #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 22898b9484cSchristos (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 22998b9484cSchristos 23098b9484cSchristos #define RE_SYNTAX_POSIX_EXTENDED \ 23198b9484cSchristos (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 23298b9484cSchristos | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 23398b9484cSchristos | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 23498b9484cSchristos | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 23598b9484cSchristos 23698b9484cSchristos /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 23798b9484cSchristos removed and RE_NO_BK_REFS is added. */ 23898b9484cSchristos #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 23998b9484cSchristos (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 24098b9484cSchristos | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 24198b9484cSchristos | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 24298b9484cSchristos | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 24398b9484cSchristos /* [[[end syntaxes]]] */ 24498b9484cSchristos 24598b9484cSchristos /* Maximum number of duplicates an interval can allow. Some systems 24698b9484cSchristos (erroneously) define this in other header files, but we want our 24798b9484cSchristos value, so remove any previous define. */ 24898b9484cSchristos #ifdef RE_DUP_MAX 24998b9484cSchristos # undef RE_DUP_MAX 25098b9484cSchristos #endif 25198b9484cSchristos /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 25298b9484cSchristos #define RE_DUP_MAX (0x7fff) 25398b9484cSchristos 25498b9484cSchristos 25598b9484cSchristos /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 25698b9484cSchristos 25798b9484cSchristos /* If this bit is set, then use extended regular expression syntax. 25898b9484cSchristos If not set, then use basic regular expression syntax. */ 25998b9484cSchristos #define REG_EXTENDED 1 26098b9484cSchristos 26198b9484cSchristos /* If this bit is set, then ignore case when matching. 26298b9484cSchristos If not set, then case is significant. */ 26398b9484cSchristos #define REG_ICASE (REG_EXTENDED << 1) 26498b9484cSchristos 26598b9484cSchristos /* If this bit is set, then anchors do not match at newline 26698b9484cSchristos characters in the string. 26798b9484cSchristos If not set, then anchors do match at newlines. */ 26898b9484cSchristos #define REG_NEWLINE (REG_ICASE << 1) 26998b9484cSchristos 27098b9484cSchristos /* If this bit is set, then report only success or fail in regexec. 27198b9484cSchristos If not set, then returns differ between not matching and errors. */ 27298b9484cSchristos #define REG_NOSUB (REG_NEWLINE << 1) 27398b9484cSchristos 27498b9484cSchristos 27598b9484cSchristos /* POSIX `eflags' bits (i.e., information for regexec). */ 27698b9484cSchristos 27798b9484cSchristos /* If this bit is set, then the beginning-of-line operator doesn't match 27898b9484cSchristos the beginning of the string (presumably because it's not the 27998b9484cSchristos beginning of a line). 28098b9484cSchristos If not set, then the beginning-of-line operator does match the 28198b9484cSchristos beginning of the string. */ 28298b9484cSchristos #define REG_NOTBOL 1 28398b9484cSchristos 28498b9484cSchristos /* Like REG_NOTBOL, except for the end-of-line. */ 28598b9484cSchristos #define REG_NOTEOL (1 << 1) 28698b9484cSchristos 28798b9484cSchristos 28898b9484cSchristos /* If any error codes are removed, changed, or added, update the 28998b9484cSchristos `re_error_msg' table in regex.c. */ 29098b9484cSchristos typedef enum 29198b9484cSchristos { 29298b9484cSchristos #ifdef _XOPEN_SOURCE 29398b9484cSchristos REG_ENOSYS = -1, /* This will never happen for this implementation. */ 29498b9484cSchristos #endif 29598b9484cSchristos 29698b9484cSchristos REG_NOERROR = 0, /* Success. */ 29798b9484cSchristos REG_NOMATCH, /* Didn't find a match (for regexec). */ 29898b9484cSchristos 29998b9484cSchristos /* POSIX regcomp return error codes. (In the order listed in the 30098b9484cSchristos standard.) */ 30198b9484cSchristos REG_BADPAT, /* Invalid pattern. */ 30298b9484cSchristos REG_ECOLLATE, /* Not implemented. */ 30398b9484cSchristos REG_ECTYPE, /* Invalid character class name. */ 30498b9484cSchristos REG_EESCAPE, /* Trailing backslash. */ 30598b9484cSchristos REG_ESUBREG, /* Invalid back reference. */ 30698b9484cSchristos REG_EBRACK, /* Unmatched left bracket. */ 30798b9484cSchristos REG_EPAREN, /* Parenthesis imbalance. */ 30898b9484cSchristos REG_EBRACE, /* Unmatched \{. */ 30998b9484cSchristos REG_BADBR, /* Invalid contents of \{\}. */ 31098b9484cSchristos REG_ERANGE, /* Invalid range end. */ 31198b9484cSchristos REG_ESPACE, /* Ran out of memory. */ 31298b9484cSchristos REG_BADRPT, /* No preceding re for repetition op. */ 31398b9484cSchristos 31498b9484cSchristos /* Error codes we've added. */ 31598b9484cSchristos REG_EEND, /* Premature end. */ 31698b9484cSchristos REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 31798b9484cSchristos REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 31898b9484cSchristos } reg_errcode_t; 31998b9484cSchristos 32098b9484cSchristos /* This data structure represents a compiled pattern. Before calling 32198b9484cSchristos the pattern compiler, the fields `buffer', `allocated', `fastmap', 32298b9484cSchristos `translate', and `no_sub' can be set. After the pattern has been 32398b9484cSchristos compiled, the `re_nsub' field is available. All other fields are 32498b9484cSchristos private to the regex routines. */ 32598b9484cSchristos 32698b9484cSchristos #ifndef RE_TRANSLATE_TYPE 32798b9484cSchristos # define RE_TRANSLATE_TYPE char * 32898b9484cSchristos #endif 32998b9484cSchristos 33098b9484cSchristos struct re_pattern_buffer 33198b9484cSchristos { 33298b9484cSchristos /* [[[begin pattern_buffer]]] */ 33398b9484cSchristos /* Space that holds the compiled pattern. It is declared as 33498b9484cSchristos `unsigned char *' because its elements are 33598b9484cSchristos sometimes used as array indexes. */ 33698b9484cSchristos unsigned char *buffer; 33798b9484cSchristos 33898b9484cSchristos /* Number of bytes to which `buffer' points. */ 33998b9484cSchristos unsigned long int allocated; 34098b9484cSchristos 34198b9484cSchristos /* Number of bytes actually used in `buffer'. */ 34298b9484cSchristos unsigned long int used; 34398b9484cSchristos 34498b9484cSchristos /* Syntax setting with which the pattern was compiled. */ 34598b9484cSchristos reg_syntax_t syntax; 34698b9484cSchristos 34798b9484cSchristos /* Pointer to a fastmap, if any, otherwise zero. re_search uses 34898b9484cSchristos the fastmap, if there is one, to skip over impossible 34998b9484cSchristos starting points for matches. */ 35098b9484cSchristos char *fastmap; 35198b9484cSchristos 35298b9484cSchristos /* Either a translate table to apply to all characters before 35398b9484cSchristos comparing them, or zero for no translation. The translation 35498b9484cSchristos is applied to a pattern when it is compiled and to a string 35598b9484cSchristos when it is matched. */ 35698b9484cSchristos RE_TRANSLATE_TYPE translate; 35798b9484cSchristos 35898b9484cSchristos /* Number of subexpressions found by the compiler. */ 35998b9484cSchristos size_t re_nsub; 36098b9484cSchristos 36198b9484cSchristos /* Zero if this pattern cannot match the empty string, one else. 36298b9484cSchristos Well, in truth it's used only in `re_search_2', to see 36398b9484cSchristos whether or not we should use the fastmap, so we don't set 36498b9484cSchristos this absolutely perfectly; see `re_compile_fastmap' (the 36598b9484cSchristos `duplicate' case). */ 36698b9484cSchristos unsigned can_be_null : 1; 36798b9484cSchristos 36898b9484cSchristos /* If REGS_UNALLOCATED, allocate space in the `regs' structure 36998b9484cSchristos for `max (RE_NREGS, re_nsub + 1)' groups. 37098b9484cSchristos If REGS_REALLOCATE, reallocate space if necessary. 37198b9484cSchristos If REGS_FIXED, use what's there. */ 37298b9484cSchristos #define REGS_UNALLOCATED 0 37398b9484cSchristos #define REGS_REALLOCATE 1 37498b9484cSchristos #define REGS_FIXED 2 37598b9484cSchristos unsigned regs_allocated : 2; 37698b9484cSchristos 37798b9484cSchristos /* Set to zero when `regex_compile' compiles a pattern; set to one 37898b9484cSchristos by `re_compile_fastmap' if it updates the fastmap. */ 37998b9484cSchristos unsigned fastmap_accurate : 1; 38098b9484cSchristos 38198b9484cSchristos /* If set, `re_match_2' does not return information about 38298b9484cSchristos subexpressions. */ 38398b9484cSchristos unsigned no_sub : 1; 38498b9484cSchristos 38598b9484cSchristos /* If set, a beginning-of-line anchor doesn't match at the 38698b9484cSchristos beginning of the string. */ 38798b9484cSchristos unsigned not_bol : 1; 38898b9484cSchristos 38998b9484cSchristos /* Similarly for an end-of-line anchor. */ 39098b9484cSchristos unsigned not_eol : 1; 39198b9484cSchristos 39298b9484cSchristos /* If true, an anchor at a newline matches. */ 39398b9484cSchristos unsigned newline_anchor : 1; 39498b9484cSchristos 39598b9484cSchristos /* [[[end pattern_buffer]]] */ 39698b9484cSchristos }; 39798b9484cSchristos 39898b9484cSchristos typedef struct re_pattern_buffer regex_t; 39998b9484cSchristos 40098b9484cSchristos /* Type for byte offsets within the string. POSIX mandates this. */ 40198b9484cSchristos typedef int regoff_t; 40298b9484cSchristos 40398b9484cSchristos 40498b9484cSchristos /* This is the structure we store register match data in. See 40598b9484cSchristos regex.texinfo for a full description of what registers match. */ 40698b9484cSchristos struct re_registers 40798b9484cSchristos { 40898b9484cSchristos unsigned num_regs; 40998b9484cSchristos regoff_t *start; 41098b9484cSchristos regoff_t *end; 41198b9484cSchristos }; 41298b9484cSchristos 41398b9484cSchristos 41498b9484cSchristos /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 41598b9484cSchristos `re_match_2' returns information about at least this many registers 41698b9484cSchristos the first time a `regs' structure is passed. */ 41798b9484cSchristos #ifndef RE_NREGS 41898b9484cSchristos # define RE_NREGS 30 41998b9484cSchristos #endif 42098b9484cSchristos 42198b9484cSchristos 42298b9484cSchristos /* POSIX specification for registers. Aside from the different names than 42398b9484cSchristos `re_registers', POSIX uses an array of structures, instead of a 42498b9484cSchristos structure of arrays. */ 42598b9484cSchristos typedef struct 42698b9484cSchristos { 42798b9484cSchristos regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 42898b9484cSchristos regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 42998b9484cSchristos } regmatch_t; 43098b9484cSchristos 43198b9484cSchristos /* Declarations for routines. */ 43298b9484cSchristos 43398b9484cSchristos /* To avoid duplicating every routine declaration -- once with a 43498b9484cSchristos prototype (if we are ANSI), and once without (if we aren't) -- we 43598b9484cSchristos use the following macro to declare argument types. This 43698b9484cSchristos unfortunately clutters up the declarations a bit, but I think it's 43798b9484cSchristos worth it. */ 43898b9484cSchristos 43998b9484cSchristos /* Sets the current default syntax to SYNTAX, and return the old syntax. 44098b9484cSchristos You can also simply assign to the `re_syntax_options' variable. */ 44198b9484cSchristos extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); 44298b9484cSchristos 44398b9484cSchristos /* Compile the regular expression PATTERN, with length LENGTH 44498b9484cSchristos and syntax given by the global `re_syntax_options', into the buffer 44598b9484cSchristos BUFFER. Return NULL if successful, and an error string if not. */ 44698b9484cSchristos extern const char *re_compile_pattern (const char *pattern, size_t length, 44798b9484cSchristos struct re_pattern_buffer *buffer); 44898b9484cSchristos 44998b9484cSchristos 45098b9484cSchristos /* Compile a fastmap for the compiled pattern in BUFFER; used to 45198b9484cSchristos accelerate searches. Return 0 if successful and -2 if was an 45298b9484cSchristos internal error. */ 45398b9484cSchristos extern int re_compile_fastmap (struct re_pattern_buffer *buffer); 45498b9484cSchristos 45598b9484cSchristos 45698b9484cSchristos /* Search in the string STRING (with length LENGTH) for the pattern 45798b9484cSchristos compiled into BUFFER. Start searching at position START, for RANGE 45898b9484cSchristos characters. Return the starting position of the match, -1 for no 45998b9484cSchristos match, or -2 for an internal error. Also return register 46098b9484cSchristos information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 46198b9484cSchristos extern int re_search (struct re_pattern_buffer *buffer, const char *string, 46298b9484cSchristos int length, int start, int range, 46398b9484cSchristos struct re_registers *regs); 46498b9484cSchristos 46598b9484cSchristos 46698b9484cSchristos /* Like `re_search', but search in the concatenation of STRING1 and 46798b9484cSchristos STRING2. Also, stop searching at index START + STOP. */ 46898b9484cSchristos extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, 46998b9484cSchristos int length1, const char *string2, int length2, 47098b9484cSchristos int start, int range, struct re_registers *regs, 47198b9484cSchristos int stop); 47298b9484cSchristos 47398b9484cSchristos 47498b9484cSchristos /* Like `re_search', but return how many characters in STRING the regexp 47598b9484cSchristos in BUFFER matched, starting at position START. */ 47698b9484cSchristos extern int re_match (struct re_pattern_buffer *buffer, const char *string, 47798b9484cSchristos int length, int start, struct re_registers *regs); 47898b9484cSchristos 47998b9484cSchristos 48098b9484cSchristos /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 48198b9484cSchristos extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, 48298b9484cSchristos int length1, const char *string2, int length2, 48398b9484cSchristos int start, struct re_registers *regs, int stop); 48498b9484cSchristos 48598b9484cSchristos 48698b9484cSchristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 48798b9484cSchristos ENDS. Subsequent matches using BUFFER and REGS will use this memory 48898b9484cSchristos for recording register information. STARTS and ENDS must be 48998b9484cSchristos allocated with malloc, and must each be at least `NUM_REGS * sizeof 49098b9484cSchristos (regoff_t)' bytes long. 49198b9484cSchristos 49298b9484cSchristos If NUM_REGS == 0, then subsequent matches should allocate their own 49398b9484cSchristos register data. 49498b9484cSchristos 49598b9484cSchristos Unless this function is called, the first search or match using 49698b9484cSchristos PATTERN_BUFFER will allocate its own register data, without 49798b9484cSchristos freeing the old data. */ 49898b9484cSchristos extern void re_set_registers (struct re_pattern_buffer *buffer, 49998b9484cSchristos struct re_registers *regs, 50098b9484cSchristos unsigned num_regs, regoff_t *starts, 50198b9484cSchristos regoff_t *ends); 50298b9484cSchristos 50398b9484cSchristos #if defined _REGEX_RE_COMP || defined _LIBC 50498b9484cSchristos # ifndef _CRAY 50598b9484cSchristos /* 4.2 bsd compatibility. */ 50698b9484cSchristos extern char *re_comp (const char *); 50798b9484cSchristos extern int re_exec (const char *); 50898b9484cSchristos # endif 50998b9484cSchristos #endif 51098b9484cSchristos 51198b9484cSchristos /* GCC 2.95 and later have "__restrict"; C99 compilers have 51298b9484cSchristos "restrict", and "configure" may have defined "restrict". */ 51398b9484cSchristos #ifndef __restrict 51498b9484cSchristos # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 51598b9484cSchristos # if defined restrict || 199901L <= __STDC_VERSION__ 51698b9484cSchristos # define __restrict restrict 51798b9484cSchristos # else 51898b9484cSchristos # define __restrict 51998b9484cSchristos # endif 52098b9484cSchristos # endif 52198b9484cSchristos #endif 52298b9484cSchristos 52398b9484cSchristos /* GCC 3.1 and later support declaring arrays as non-overlapping 52498b9484cSchristos using the syntax array_name[restrict] */ 52598b9484cSchristos #ifndef __restrict_arr 52698b9484cSchristos # if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__) 52798b9484cSchristos # define __restrict_arr 52898b9484cSchristos # else 52998b9484cSchristos # define __restrict_arr __restrict 53098b9484cSchristos # endif 53198b9484cSchristos #endif 53298b9484cSchristos 53398b9484cSchristos /* POSIX compatibility. */ 53498b9484cSchristos extern int regcomp (regex_t *__restrict __preg, 53598b9484cSchristos const char *__restrict __pattern, 53698b9484cSchristos int __cflags); 53798b9484cSchristos 53898b9484cSchristos #if (__GNUC__) 53998b9484cSchristos __extension__ 54098b9484cSchristos #endif 54198b9484cSchristos extern int regexec (const regex_t *__restrict __preg, 54298b9484cSchristos const char *__restrict __string, size_t __nmatch, 54398b9484cSchristos regmatch_t __pmatch[__restrict_arr], 54498b9484cSchristos int __eflags); 54598b9484cSchristos 54698b9484cSchristos extern size_t regerror (int __errcode, const regex_t *__preg, 54798b9484cSchristos char *__errbuf, size_t __errbuf_size); 54898b9484cSchristos 54998b9484cSchristos extern void regfree (regex_t *__preg); 55098b9484cSchristos 55198b9484cSchristos 55298b9484cSchristos #ifdef __cplusplus 55398b9484cSchristos } 55498b9484cSchristos #endif /* C++ */ 55598b9484cSchristos 55698b9484cSchristos #endif /* regex.h */ 55798b9484cSchristos 55898b9484cSchristos /* 55998b9484cSchristos Local variables: 56098b9484cSchristos make-backup-files: t 56198b9484cSchristos version-control: t 56298b9484cSchristos trim-versions-without-asking: nil 56398b9484cSchristos End: 56498b9484cSchristos */ 565