1*d2201f2fSdrahn /* Definitions for data structures and routines for the regular 2*d2201f2fSdrahn expression library, version 0.12. 3*d2201f2fSdrahn Copyright (C) 1985,1989-1993,1995-1998, 2000 Free Software Foundation, Inc. 4*d2201f2fSdrahn This file is part of the GNU C Library. Its master source is NOT part of 5*d2201f2fSdrahn the C library, however. The master source lives in /gd/gnu/lib. 6*d2201f2fSdrahn 7*d2201f2fSdrahn The GNU C Library is free software; you can redistribute it and/or 8*d2201f2fSdrahn modify it under the terms of the GNU Lesser General Public 9*d2201f2fSdrahn License as published by the Free Software Foundation; either 10*d2201f2fSdrahn version 2.1 of the License, or (at your option) any later version. 11*d2201f2fSdrahn 12*d2201f2fSdrahn The GNU C Library is distributed in the hope that it will be useful, 13*d2201f2fSdrahn but WITHOUT ANY WARRANTY; without even the implied warranty of 14*d2201f2fSdrahn MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15*d2201f2fSdrahn Lesser General Public License for more details. 16*d2201f2fSdrahn 17*d2201f2fSdrahn You should have received a copy of the GNU Lesser General Public 18*d2201f2fSdrahn License along with the GNU C Library; if not, write to the Free 19*d2201f2fSdrahn Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 20*d2201f2fSdrahn 02111-1307 USA. */ 21*d2201f2fSdrahn 22*d2201f2fSdrahn #ifndef _REGEX_H 23*d2201f2fSdrahn #define _REGEX_H 1 24*d2201f2fSdrahn 25*d2201f2fSdrahn /* Allow the use in C++ code. */ 26*d2201f2fSdrahn #ifdef __cplusplus 27*d2201f2fSdrahn extern "C" { 28*d2201f2fSdrahn #endif 29*d2201f2fSdrahn 30*d2201f2fSdrahn /* POSIX says that <sys/types.h> must be included (by the caller) before 31*d2201f2fSdrahn <regex.h>. */ 32*d2201f2fSdrahn 33*d2201f2fSdrahn #if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS 34*d2201f2fSdrahn /* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it 35*d2201f2fSdrahn should be there. */ 36*d2201f2fSdrahn # include <stddef.h> 37*d2201f2fSdrahn #endif 38*d2201f2fSdrahn 39*d2201f2fSdrahn /* The following two types have to be signed and unsigned integer type 40*d2201f2fSdrahn wide enough to hold a value of a pointer. For most ANSI compilers 41*d2201f2fSdrahn ptrdiff_t and size_t should be likely OK. Still size of these two 42*d2201f2fSdrahn types is 2 for Microsoft C. Ugh... */ 43*d2201f2fSdrahn typedef long int s_reg_t; 44*d2201f2fSdrahn typedef unsigned long int active_reg_t; 45*d2201f2fSdrahn 46*d2201f2fSdrahn /* The following bits are used to determine the regexp syntax we 47*d2201f2fSdrahn recognize. The set/not-set meanings are chosen so that Emacs syntax 48*d2201f2fSdrahn remains the value 0. The bits are given in alphabetical order, and 49*d2201f2fSdrahn the definitions shifted by one from the previous bit; thus, when we 50*d2201f2fSdrahn add or remove a bit, only one other definition need change. */ 51*d2201f2fSdrahn typedef unsigned long int reg_syntax_t; 52*d2201f2fSdrahn 53*d2201f2fSdrahn /* If this bit is not set, then \ inside a bracket expression is literal. 54*d2201f2fSdrahn If set, then such a \ quotes the following character. */ 55*d2201f2fSdrahn #define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 56*d2201f2fSdrahn 57*d2201f2fSdrahn /* If this bit is not set, then + and ? are operators, and \+ and \? are 58*d2201f2fSdrahn literals. 59*d2201f2fSdrahn If set, then \+ and \? are operators and + and ? are literals. */ 60*d2201f2fSdrahn #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 61*d2201f2fSdrahn 62*d2201f2fSdrahn /* If this bit is set, then character classes are supported. They are: 63*d2201f2fSdrahn [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 64*d2201f2fSdrahn [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 65*d2201f2fSdrahn If not set, then character classes are not supported. */ 66*d2201f2fSdrahn #define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 67*d2201f2fSdrahn 68*d2201f2fSdrahn /* If this bit is set, then ^ and $ are always anchors (outside bracket 69*d2201f2fSdrahn expressions, of course). 70*d2201f2fSdrahn If this bit is not set, then it depends: 71*d2201f2fSdrahn ^ is an anchor if it is at the beginning of a regular 72*d2201f2fSdrahn expression or after an open-group or an alternation operator; 73*d2201f2fSdrahn $ is an anchor if it is at the end of a regular expression, or 74*d2201f2fSdrahn before a close-group or an alternation operator. 75*d2201f2fSdrahn 76*d2201f2fSdrahn This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 77*d2201f2fSdrahn POSIX draft 11.2 says that * etc. in leading positions is undefined. 78*d2201f2fSdrahn We already implemented a previous draft which made those constructs 79*d2201f2fSdrahn invalid, though, so we haven't changed the code back. */ 80*d2201f2fSdrahn #define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 81*d2201f2fSdrahn 82*d2201f2fSdrahn /* If this bit is set, then special characters are always special 83*d2201f2fSdrahn regardless of where they are in the pattern. 84*d2201f2fSdrahn If this bit is not set, then special characters are special only in 85*d2201f2fSdrahn some contexts; otherwise they are ordinary. Specifically, 86*d2201f2fSdrahn * + ? and intervals are only special when not after the beginning, 87*d2201f2fSdrahn open-group, or alternation operator. */ 88*d2201f2fSdrahn #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 89*d2201f2fSdrahn 90*d2201f2fSdrahn /* If this bit is set, then *, +, ?, and { cannot be first in an re or 91*d2201f2fSdrahn immediately after an alternation or begin-group operator. */ 92*d2201f2fSdrahn #define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 93*d2201f2fSdrahn 94*d2201f2fSdrahn /* If this bit is set, then . matches newline. 95*d2201f2fSdrahn If not set, then it doesn't. */ 96*d2201f2fSdrahn #define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 97*d2201f2fSdrahn 98*d2201f2fSdrahn /* If this bit is set, then . doesn't match NUL. 99*d2201f2fSdrahn If not set, then it does. */ 100*d2201f2fSdrahn #define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 101*d2201f2fSdrahn 102*d2201f2fSdrahn /* If this bit is set, nonmatching lists [^...] do not match newline. 103*d2201f2fSdrahn If not set, they do. */ 104*d2201f2fSdrahn #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 105*d2201f2fSdrahn 106*d2201f2fSdrahn /* If this bit is set, either \{...\} or {...} defines an 107*d2201f2fSdrahn interval, depending on RE_NO_BK_BRACES. 108*d2201f2fSdrahn If not set, \{, \}, {, and } are literals. */ 109*d2201f2fSdrahn #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 110*d2201f2fSdrahn 111*d2201f2fSdrahn /* If this bit is set, +, ? and | aren't recognized as operators. 112*d2201f2fSdrahn If not set, they are. */ 113*d2201f2fSdrahn #define RE_LIMITED_OPS (RE_INTERVALS << 1) 114*d2201f2fSdrahn 115*d2201f2fSdrahn /* If this bit is set, newline is an alternation operator. 116*d2201f2fSdrahn If not set, newline is literal. */ 117*d2201f2fSdrahn #define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 118*d2201f2fSdrahn 119*d2201f2fSdrahn /* If this bit is set, then `{...}' defines an interval, and \{ and \} 120*d2201f2fSdrahn are literals. 121*d2201f2fSdrahn If not set, then `\{...\}' defines an interval. */ 122*d2201f2fSdrahn #define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 123*d2201f2fSdrahn 124*d2201f2fSdrahn /* If this bit is set, (...) defines a group, and \( and \) are literals. 125*d2201f2fSdrahn If not set, \(...\) defines a group, and ( and ) are literals. */ 126*d2201f2fSdrahn #define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 127*d2201f2fSdrahn 128*d2201f2fSdrahn /* If this bit is set, then \<digit> matches <digit>. 129*d2201f2fSdrahn If not set, then \<digit> is a back-reference. */ 130*d2201f2fSdrahn #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 131*d2201f2fSdrahn 132*d2201f2fSdrahn /* If this bit is set, then | is an alternation operator, and \| is literal. 133*d2201f2fSdrahn If not set, then \| is an alternation operator, and | is literal. */ 134*d2201f2fSdrahn #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 135*d2201f2fSdrahn 136*d2201f2fSdrahn /* If this bit is set, then an ending range point collating higher 137*d2201f2fSdrahn than the starting range point, as in [z-a], is invalid. 138*d2201f2fSdrahn If not set, then when ending range point collates higher than the 139*d2201f2fSdrahn starting range point, the range is ignored. */ 140*d2201f2fSdrahn #define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 141*d2201f2fSdrahn 142*d2201f2fSdrahn /* If this bit is set, then an unmatched ) is ordinary. 143*d2201f2fSdrahn If not set, then an unmatched ) is invalid. */ 144*d2201f2fSdrahn #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 145*d2201f2fSdrahn 146*d2201f2fSdrahn /* If this bit is set, succeed as soon as we match the whole pattern, 147*d2201f2fSdrahn without further backtracking. */ 148*d2201f2fSdrahn #define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 149*d2201f2fSdrahn 150*d2201f2fSdrahn /* If this bit is set, do not process the GNU regex operators. 151*d2201f2fSdrahn If not set, then the GNU regex operators are recognized. */ 152*d2201f2fSdrahn #define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 153*d2201f2fSdrahn 154*d2201f2fSdrahn /* If this bit is set, turn on internal regex debugging. 155*d2201f2fSdrahn If not set, and debugging was on, turn it off. 156*d2201f2fSdrahn This only works if regex.c is compiled -DDEBUG. 157*d2201f2fSdrahn We define this bit always, so that all that's needed to turn on 158*d2201f2fSdrahn debugging is to recompile regex.c; the calling code can always have 159*d2201f2fSdrahn this bit set, and it won't affect anything in the normal case. */ 160*d2201f2fSdrahn #define RE_DEBUG (RE_NO_GNU_OPS << 1) 161*d2201f2fSdrahn 162*d2201f2fSdrahn /* If this bit is set, a syntactically invalid interval is treated as 163*d2201f2fSdrahn a string of ordinary characters. For example, the ERE 'a{1' is 164*d2201f2fSdrahn treated as 'a\{1'. */ 165*d2201f2fSdrahn #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 166*d2201f2fSdrahn 167*d2201f2fSdrahn /* This global variable defines the particular regexp syntax to use (for 168*d2201f2fSdrahn some interfaces). When a regexp is compiled, the syntax used is 169*d2201f2fSdrahn stored in the pattern buffer, so changing this does not affect 170*d2201f2fSdrahn already-compiled regexps. */ 171*d2201f2fSdrahn extern reg_syntax_t re_syntax_options; 172*d2201f2fSdrahn 173*d2201f2fSdrahn /* Define combinations of the above bits for the standard possibilities. 174*d2201f2fSdrahn (The [[[ comments delimit what gets put into the Texinfo file, so 175*d2201f2fSdrahn don't delete them!) */ 176*d2201f2fSdrahn /* [[[begin syntaxes]]] */ 177*d2201f2fSdrahn #define RE_SYNTAX_EMACS 0 178*d2201f2fSdrahn 179*d2201f2fSdrahn #define RE_SYNTAX_AWK \ 180*d2201f2fSdrahn (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 181*d2201f2fSdrahn | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 182*d2201f2fSdrahn | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 183*d2201f2fSdrahn | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 184*d2201f2fSdrahn | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 185*d2201f2fSdrahn 186*d2201f2fSdrahn #define RE_SYNTAX_GNU_AWK \ 187*d2201f2fSdrahn ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 188*d2201f2fSdrahn & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) 189*d2201f2fSdrahn 190*d2201f2fSdrahn #define RE_SYNTAX_POSIX_AWK \ 191*d2201f2fSdrahn (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 192*d2201f2fSdrahn | RE_INTERVALS | RE_NO_GNU_OPS) 193*d2201f2fSdrahn 194*d2201f2fSdrahn #define RE_SYNTAX_GREP \ 195*d2201f2fSdrahn (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 196*d2201f2fSdrahn | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 197*d2201f2fSdrahn | RE_NEWLINE_ALT) 198*d2201f2fSdrahn 199*d2201f2fSdrahn #define RE_SYNTAX_EGREP \ 200*d2201f2fSdrahn (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 201*d2201f2fSdrahn | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 202*d2201f2fSdrahn | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 203*d2201f2fSdrahn | RE_NO_BK_VBAR) 204*d2201f2fSdrahn 205*d2201f2fSdrahn #define RE_SYNTAX_POSIX_EGREP \ 206*d2201f2fSdrahn (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 207*d2201f2fSdrahn | RE_INVALID_INTERVAL_ORD) 208*d2201f2fSdrahn 209*d2201f2fSdrahn /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 210*d2201f2fSdrahn #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 211*d2201f2fSdrahn 212*d2201f2fSdrahn #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 213*d2201f2fSdrahn 214*d2201f2fSdrahn /* Syntax bits common to both basic and extended POSIX regex syntax. */ 215*d2201f2fSdrahn #define _RE_SYNTAX_POSIX_COMMON \ 216*d2201f2fSdrahn (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 217*d2201f2fSdrahn | RE_INTERVALS | RE_NO_EMPTY_RANGES) 218*d2201f2fSdrahn 219*d2201f2fSdrahn #define RE_SYNTAX_POSIX_BASIC \ 220*d2201f2fSdrahn (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 221*d2201f2fSdrahn 222*d2201f2fSdrahn /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 223*d2201f2fSdrahn RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 224*d2201f2fSdrahn isn't minimal, since other operators, such as \`, aren't disabled. */ 225*d2201f2fSdrahn #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 226*d2201f2fSdrahn (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 227*d2201f2fSdrahn 228*d2201f2fSdrahn #define RE_SYNTAX_POSIX_EXTENDED \ 229*d2201f2fSdrahn (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 230*d2201f2fSdrahn | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 231*d2201f2fSdrahn | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 232*d2201f2fSdrahn | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 233*d2201f2fSdrahn 234*d2201f2fSdrahn /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 235*d2201f2fSdrahn removed and RE_NO_BK_REFS is added. */ 236*d2201f2fSdrahn #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 237*d2201f2fSdrahn (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 238*d2201f2fSdrahn | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 239*d2201f2fSdrahn | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 240*d2201f2fSdrahn | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 241*d2201f2fSdrahn /* [[[end syntaxes]]] */ 242*d2201f2fSdrahn 243*d2201f2fSdrahn /* Maximum number of duplicates an interval can allow. Some systems 244*d2201f2fSdrahn (erroneously) define this in other header files, but we want our 245*d2201f2fSdrahn value, so remove any previous define. */ 246*d2201f2fSdrahn #ifdef RE_DUP_MAX 247*d2201f2fSdrahn # undef RE_DUP_MAX 248*d2201f2fSdrahn #endif 249*d2201f2fSdrahn /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 250*d2201f2fSdrahn #define RE_DUP_MAX (0x7fff) 251*d2201f2fSdrahn 252*d2201f2fSdrahn 253*d2201f2fSdrahn /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 254*d2201f2fSdrahn 255*d2201f2fSdrahn /* If this bit is set, then use extended regular expression syntax. 256*d2201f2fSdrahn If not set, then use basic regular expression syntax. */ 257*d2201f2fSdrahn #define REG_EXTENDED 1 258*d2201f2fSdrahn 259*d2201f2fSdrahn /* If this bit is set, then ignore case when matching. 260*d2201f2fSdrahn If not set, then case is significant. */ 261*d2201f2fSdrahn #define REG_ICASE (REG_EXTENDED << 1) 262*d2201f2fSdrahn 263*d2201f2fSdrahn /* If this bit is set, then anchors do not match at newline 264*d2201f2fSdrahn characters in the string. 265*d2201f2fSdrahn If not set, then anchors do match at newlines. */ 266*d2201f2fSdrahn #define REG_NEWLINE (REG_ICASE << 1) 267*d2201f2fSdrahn 268*d2201f2fSdrahn /* If this bit is set, then report only success or fail in regexec. 269*d2201f2fSdrahn If not set, then returns differ between not matching and errors. */ 270*d2201f2fSdrahn #define REG_NOSUB (REG_NEWLINE << 1) 271*d2201f2fSdrahn 272*d2201f2fSdrahn 273*d2201f2fSdrahn /* POSIX `eflags' bits (i.e., information for regexec). */ 274*d2201f2fSdrahn 275*d2201f2fSdrahn /* If this bit is set, then the beginning-of-line operator doesn't match 276*d2201f2fSdrahn the beginning of the string (presumably because it's not the 277*d2201f2fSdrahn beginning of a line). 278*d2201f2fSdrahn If not set, then the beginning-of-line operator does match the 279*d2201f2fSdrahn beginning of the string. */ 280*d2201f2fSdrahn #define REG_NOTBOL 1 281*d2201f2fSdrahn 282*d2201f2fSdrahn /* Like REG_NOTBOL, except for the end-of-line. */ 283*d2201f2fSdrahn #define REG_NOTEOL (1 << 1) 284*d2201f2fSdrahn 285*d2201f2fSdrahn 286*d2201f2fSdrahn /* If any error codes are removed, changed, or added, update the 287*d2201f2fSdrahn `re_error_msg' table in regex.c. */ 288*d2201f2fSdrahn typedef enum 289*d2201f2fSdrahn { 290*d2201f2fSdrahn #ifdef _XOPEN_SOURCE 291*d2201f2fSdrahn REG_ENOSYS = -1, /* This will never happen for this implementation. */ 292*d2201f2fSdrahn #endif 293*d2201f2fSdrahn 294*d2201f2fSdrahn REG_NOERROR = 0, /* Success. */ 295*d2201f2fSdrahn REG_NOMATCH, /* Didn't find a match (for regexec). */ 296*d2201f2fSdrahn 297*d2201f2fSdrahn /* POSIX regcomp return error codes. (In the order listed in the 298*d2201f2fSdrahn standard.) */ 299*d2201f2fSdrahn REG_BADPAT, /* Invalid pattern. */ 300*d2201f2fSdrahn REG_ECOLLATE, /* Not implemented. */ 301*d2201f2fSdrahn REG_ECTYPE, /* Invalid character class name. */ 302*d2201f2fSdrahn REG_EESCAPE, /* Trailing backslash. */ 303*d2201f2fSdrahn REG_ESUBREG, /* Invalid back reference. */ 304*d2201f2fSdrahn REG_EBRACK, /* Unmatched left bracket. */ 305*d2201f2fSdrahn REG_EPAREN, /* Parenthesis imbalance. */ 306*d2201f2fSdrahn REG_EBRACE, /* Unmatched \{. */ 307*d2201f2fSdrahn REG_BADBR, /* Invalid contents of \{\}. */ 308*d2201f2fSdrahn REG_ERANGE, /* Invalid range end. */ 309*d2201f2fSdrahn REG_ESPACE, /* Ran out of memory. */ 310*d2201f2fSdrahn REG_BADRPT, /* No preceding re for repetition op. */ 311*d2201f2fSdrahn 312*d2201f2fSdrahn /* Error codes we've added. */ 313*d2201f2fSdrahn REG_EEND, /* Premature end. */ 314*d2201f2fSdrahn REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 315*d2201f2fSdrahn REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 316*d2201f2fSdrahn } reg_errcode_t; 317*d2201f2fSdrahn 318*d2201f2fSdrahn /* This data structure represents a compiled pattern. Before calling 319*d2201f2fSdrahn the pattern compiler, the fields `buffer', `allocated', `fastmap', 320*d2201f2fSdrahn `translate', and `no_sub' can be set. After the pattern has been 321*d2201f2fSdrahn compiled, the `re_nsub' field is available. All other fields are 322*d2201f2fSdrahn private to the regex routines. */ 323*d2201f2fSdrahn 324*d2201f2fSdrahn #ifndef RE_TRANSLATE_TYPE 325*d2201f2fSdrahn # define RE_TRANSLATE_TYPE char * 326*d2201f2fSdrahn #endif 327*d2201f2fSdrahn 328*d2201f2fSdrahn struct re_pattern_buffer 329*d2201f2fSdrahn { 330*d2201f2fSdrahn /* [[[begin pattern_buffer]]] */ 331*d2201f2fSdrahn /* Space that holds the compiled pattern. It is declared as 332*d2201f2fSdrahn `unsigned char *' because its elements are 333*d2201f2fSdrahn sometimes used as array indexes. */ 334*d2201f2fSdrahn unsigned char *buffer; 335*d2201f2fSdrahn 336*d2201f2fSdrahn /* Number of bytes to which `buffer' points. */ 337*d2201f2fSdrahn unsigned long int allocated; 338*d2201f2fSdrahn 339*d2201f2fSdrahn /* Number of bytes actually used in `buffer'. */ 340*d2201f2fSdrahn unsigned long int used; 341*d2201f2fSdrahn 342*d2201f2fSdrahn /* Syntax setting with which the pattern was compiled. */ 343*d2201f2fSdrahn reg_syntax_t syntax; 344*d2201f2fSdrahn 345*d2201f2fSdrahn /* Pointer to a fastmap, if any, otherwise zero. re_search uses 346*d2201f2fSdrahn the fastmap, if there is one, to skip over impossible 347*d2201f2fSdrahn starting points for matches. */ 348*d2201f2fSdrahn char *fastmap; 349*d2201f2fSdrahn 350*d2201f2fSdrahn /* Either a translate table to apply to all characters before 351*d2201f2fSdrahn comparing them, or zero for no translation. The translation 352*d2201f2fSdrahn is applied to a pattern when it is compiled and to a string 353*d2201f2fSdrahn when it is matched. */ 354*d2201f2fSdrahn RE_TRANSLATE_TYPE translate; 355*d2201f2fSdrahn 356*d2201f2fSdrahn /* Number of subexpressions found by the compiler. */ 357*d2201f2fSdrahn size_t re_nsub; 358*d2201f2fSdrahn 359*d2201f2fSdrahn /* Zero if this pattern cannot match the empty string, one else. 360*d2201f2fSdrahn Well, in truth it's used only in `re_search_2', to see 361*d2201f2fSdrahn whether or not we should use the fastmap, so we don't set 362*d2201f2fSdrahn this absolutely perfectly; see `re_compile_fastmap' (the 363*d2201f2fSdrahn `duplicate' case). */ 364*d2201f2fSdrahn unsigned can_be_null : 1; 365*d2201f2fSdrahn 366*d2201f2fSdrahn /* If REGS_UNALLOCATED, allocate space in the `regs' structure 367*d2201f2fSdrahn for `max (RE_NREGS, re_nsub + 1)' groups. 368*d2201f2fSdrahn If REGS_REALLOCATE, reallocate space if necessary. 369*d2201f2fSdrahn If REGS_FIXED, use what's there. */ 370*d2201f2fSdrahn #define REGS_UNALLOCATED 0 371*d2201f2fSdrahn #define REGS_REALLOCATE 1 372*d2201f2fSdrahn #define REGS_FIXED 2 373*d2201f2fSdrahn unsigned regs_allocated : 2; 374*d2201f2fSdrahn 375*d2201f2fSdrahn /* Set to zero when `regex_compile' compiles a pattern; set to one 376*d2201f2fSdrahn by `re_compile_fastmap' if it updates the fastmap. */ 377*d2201f2fSdrahn unsigned fastmap_accurate : 1; 378*d2201f2fSdrahn 379*d2201f2fSdrahn /* If set, `re_match_2' does not return information about 380*d2201f2fSdrahn subexpressions. */ 381*d2201f2fSdrahn unsigned no_sub : 1; 382*d2201f2fSdrahn 383*d2201f2fSdrahn /* If set, a beginning-of-line anchor doesn't match at the 384*d2201f2fSdrahn beginning of the string. */ 385*d2201f2fSdrahn unsigned not_bol : 1; 386*d2201f2fSdrahn 387*d2201f2fSdrahn /* Similarly for an end-of-line anchor. */ 388*d2201f2fSdrahn unsigned not_eol : 1; 389*d2201f2fSdrahn 390*d2201f2fSdrahn /* If true, an anchor at a newline matches. */ 391*d2201f2fSdrahn unsigned newline_anchor : 1; 392*d2201f2fSdrahn 393*d2201f2fSdrahn /* [[[end pattern_buffer]]] */ 394*d2201f2fSdrahn }; 395*d2201f2fSdrahn 396*d2201f2fSdrahn typedef struct re_pattern_buffer regex_t; 397*d2201f2fSdrahn 398*d2201f2fSdrahn /* Type for byte offsets within the string. POSIX mandates this. */ 399*d2201f2fSdrahn typedef int regoff_t; 400*d2201f2fSdrahn 401*d2201f2fSdrahn 402*d2201f2fSdrahn /* This is the structure we store register match data in. See 403*d2201f2fSdrahn regex.texinfo for a full description of what registers match. */ 404*d2201f2fSdrahn struct re_registers 405*d2201f2fSdrahn { 406*d2201f2fSdrahn unsigned num_regs; 407*d2201f2fSdrahn regoff_t *start; 408*d2201f2fSdrahn regoff_t *end; 409*d2201f2fSdrahn }; 410*d2201f2fSdrahn 411*d2201f2fSdrahn 412*d2201f2fSdrahn /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 413*d2201f2fSdrahn `re_match_2' returns information about at least this many registers 414*d2201f2fSdrahn the first time a `regs' structure is passed. */ 415*d2201f2fSdrahn #ifndef RE_NREGS 416*d2201f2fSdrahn # define RE_NREGS 30 417*d2201f2fSdrahn #endif 418*d2201f2fSdrahn 419*d2201f2fSdrahn 420*d2201f2fSdrahn /* POSIX specification for registers. Aside from the different names than 421*d2201f2fSdrahn `re_registers', POSIX uses an array of structures, instead of a 422*d2201f2fSdrahn structure of arrays. */ 423*d2201f2fSdrahn typedef struct 424*d2201f2fSdrahn { 425*d2201f2fSdrahn regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 426*d2201f2fSdrahn regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 427*d2201f2fSdrahn } regmatch_t; 428*d2201f2fSdrahn 429*d2201f2fSdrahn /* Declarations for routines. */ 430*d2201f2fSdrahn 431*d2201f2fSdrahn /* To avoid duplicating every routine declaration -- once with a 432*d2201f2fSdrahn prototype (if we are ANSI), and once without (if we aren't) -- we 433*d2201f2fSdrahn use the following macro to declare argument types. This 434*d2201f2fSdrahn unfortunately clutters up the declarations a bit, but I think it's 435*d2201f2fSdrahn worth it. */ 436*d2201f2fSdrahn 437*d2201f2fSdrahn #if __STDC__ 438*d2201f2fSdrahn 439*d2201f2fSdrahn # define _RE_ARGS(args) args 440*d2201f2fSdrahn 441*d2201f2fSdrahn #else /* not __STDC__ */ 442*d2201f2fSdrahn 443*d2201f2fSdrahn # define _RE_ARGS(args) () 444*d2201f2fSdrahn 445*d2201f2fSdrahn #endif /* not __STDC__ */ 446*d2201f2fSdrahn 447*d2201f2fSdrahn /* Sets the current default syntax to SYNTAX, and return the old syntax. 448*d2201f2fSdrahn You can also simply assign to the `re_syntax_options' variable. */ 449*d2201f2fSdrahn extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); 450*d2201f2fSdrahn 451*d2201f2fSdrahn /* Compile the regular expression PATTERN, with length LENGTH 452*d2201f2fSdrahn and syntax given by the global `re_syntax_options', into the buffer 453*d2201f2fSdrahn BUFFER. Return NULL if successful, and an error string if not. */ 454*d2201f2fSdrahn extern const char *re_compile_pattern 455*d2201f2fSdrahn _RE_ARGS ((const char *pattern, size_t length, 456*d2201f2fSdrahn struct re_pattern_buffer *buffer)); 457*d2201f2fSdrahn 458*d2201f2fSdrahn 459*d2201f2fSdrahn /* Compile a fastmap for the compiled pattern in BUFFER; used to 460*d2201f2fSdrahn accelerate searches. Return 0 if successful and -2 if was an 461*d2201f2fSdrahn internal error. */ 462*d2201f2fSdrahn extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); 463*d2201f2fSdrahn 464*d2201f2fSdrahn 465*d2201f2fSdrahn /* Search in the string STRING (with length LENGTH) for the pattern 466*d2201f2fSdrahn compiled into BUFFER. Start searching at position START, for RANGE 467*d2201f2fSdrahn characters. Return the starting position of the match, -1 for no 468*d2201f2fSdrahn match, or -2 for an internal error. Also return register 469*d2201f2fSdrahn information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 470*d2201f2fSdrahn extern int re_search 471*d2201f2fSdrahn _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 472*d2201f2fSdrahn int length, int start, int range, struct re_registers *regs)); 473*d2201f2fSdrahn 474*d2201f2fSdrahn 475*d2201f2fSdrahn /* Like `re_search', but search in the concatenation of STRING1 and 476*d2201f2fSdrahn STRING2. Also, stop searching at index START + STOP. */ 477*d2201f2fSdrahn extern int re_search_2 478*d2201f2fSdrahn _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 479*d2201f2fSdrahn int length1, const char *string2, int length2, 480*d2201f2fSdrahn int start, int range, struct re_registers *regs, int stop)); 481*d2201f2fSdrahn 482*d2201f2fSdrahn 483*d2201f2fSdrahn /* Like `re_search', but return how many characters in STRING the regexp 484*d2201f2fSdrahn in BUFFER matched, starting at position START. */ 485*d2201f2fSdrahn extern int re_match 486*d2201f2fSdrahn _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, 487*d2201f2fSdrahn int length, int start, struct re_registers *regs)); 488*d2201f2fSdrahn 489*d2201f2fSdrahn 490*d2201f2fSdrahn /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 491*d2201f2fSdrahn extern int re_match_2 492*d2201f2fSdrahn _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, 493*d2201f2fSdrahn int length1, const char *string2, int length2, 494*d2201f2fSdrahn int start, struct re_registers *regs, int stop)); 495*d2201f2fSdrahn 496*d2201f2fSdrahn 497*d2201f2fSdrahn /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 498*d2201f2fSdrahn ENDS. Subsequent matches using BUFFER and REGS will use this memory 499*d2201f2fSdrahn for recording register information. STARTS and ENDS must be 500*d2201f2fSdrahn allocated with malloc, and must each be at least `NUM_REGS * sizeof 501*d2201f2fSdrahn (regoff_t)' bytes long. 502*d2201f2fSdrahn 503*d2201f2fSdrahn If NUM_REGS == 0, then subsequent matches should allocate their own 504*d2201f2fSdrahn register data. 505*d2201f2fSdrahn 506*d2201f2fSdrahn Unless this function is called, the first search or match using 507*d2201f2fSdrahn PATTERN_BUFFER will allocate its own register data, without 508*d2201f2fSdrahn freeing the old data. */ 509*d2201f2fSdrahn extern void re_set_registers 510*d2201f2fSdrahn _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, 511*d2201f2fSdrahn unsigned num_regs, regoff_t *starts, regoff_t *ends)); 512*d2201f2fSdrahn 513*d2201f2fSdrahn #if defined _REGEX_RE_COMP || defined _LIBC 514*d2201f2fSdrahn # ifndef _CRAY 515*d2201f2fSdrahn /* 4.2 bsd compatibility. */ 516*d2201f2fSdrahn extern char *re_comp _RE_ARGS ((const char *)); 517*d2201f2fSdrahn extern int re_exec _RE_ARGS ((const char *)); 518*d2201f2fSdrahn # endif 519*d2201f2fSdrahn #endif 520*d2201f2fSdrahn 521*d2201f2fSdrahn /* GCC 2.95 and later have "__restrict"; C99 compilers have 522*d2201f2fSdrahn "restrict", and "configure" may have defined "restrict". */ 523*d2201f2fSdrahn #ifndef __restrict 524*d2201f2fSdrahn # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 525*d2201f2fSdrahn # if defined restrict || 199901L <= __STDC_VERSION__ 526*d2201f2fSdrahn # define __restrict restrict 527*d2201f2fSdrahn # else 528*d2201f2fSdrahn # define __restrict 529*d2201f2fSdrahn # endif 530*d2201f2fSdrahn # endif 531*d2201f2fSdrahn #endif 532*d2201f2fSdrahn 533*d2201f2fSdrahn /* GCC 3.1 and later support declaring arrays as non-overlapping 534*d2201f2fSdrahn using the syntax array_name[restrict] */ 535*d2201f2fSdrahn #ifndef __restrict_arr 536*d2201f2fSdrahn # if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__) 537*d2201f2fSdrahn # define __restrict_arr 538*d2201f2fSdrahn # else 539*d2201f2fSdrahn # define __restrict_arr __restrict 540*d2201f2fSdrahn # endif 541*d2201f2fSdrahn #endif 542*d2201f2fSdrahn 543*d2201f2fSdrahn /* POSIX compatibility. */ 544*d2201f2fSdrahn extern int regcomp _RE_ARGS ((regex_t *__restrict __preg, 545*d2201f2fSdrahn const char *__restrict __pattern, 546*d2201f2fSdrahn int __cflags)); 547*d2201f2fSdrahn 548*d2201f2fSdrahn extern int regexec _RE_ARGS ((const regex_t *__restrict __preg, 549*d2201f2fSdrahn const char *__restrict __string, size_t __nmatch, 550*d2201f2fSdrahn regmatch_t __pmatch[__restrict_arr], 551*d2201f2fSdrahn int __eflags)); 552*d2201f2fSdrahn 553*d2201f2fSdrahn extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, 554*d2201f2fSdrahn char *__errbuf, size_t __errbuf_size)); 555*d2201f2fSdrahn 556*d2201f2fSdrahn extern void regfree _RE_ARGS ((regex_t *__preg)); 557*d2201f2fSdrahn 558*d2201f2fSdrahn 559*d2201f2fSdrahn #ifdef __cplusplus 560*d2201f2fSdrahn } 561*d2201f2fSdrahn #endif /* C++ */ 562*d2201f2fSdrahn 563*d2201f2fSdrahn #endif /* regex.h */ 564*d2201f2fSdrahn 565*d2201f2fSdrahn /* 566*d2201f2fSdrahn Local variables: 567*d2201f2fSdrahn make-backup-files: t 568*d2201f2fSdrahn version-control: t 569*d2201f2fSdrahn trim-versions-without-asking: nil 570*d2201f2fSdrahn End: 571*d2201f2fSdrahn */ 572