1*9663SMark.Logan@Sun.COM /* Definitions for data structures and routines for the regular 2*9663SMark.Logan@Sun.COM expression library. 3*9663SMark.Logan@Sun.COM Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006 4*9663SMark.Logan@Sun.COM Free Software Foundation, Inc. 5*9663SMark.Logan@Sun.COM This file is part of the GNU C Library. 6*9663SMark.Logan@Sun.COM 7*9663SMark.Logan@Sun.COM This program is free software; you can redistribute it and/or modify 8*9663SMark.Logan@Sun.COM it under the terms of the GNU General Public License as published by 9*9663SMark.Logan@Sun.COM the Free Software Foundation; either version 2, or (at your option) 10*9663SMark.Logan@Sun.COM any later version. 11*9663SMark.Logan@Sun.COM 12*9663SMark.Logan@Sun.COM This program is distributed in the hope that it will be useful, 13*9663SMark.Logan@Sun.COM but WITHOUT ANY WARRANTY; without even the implied warranty of 14*9663SMark.Logan@Sun.COM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*9663SMark.Logan@Sun.COM GNU General Public License for more details. 16*9663SMark.Logan@Sun.COM 17*9663SMark.Logan@Sun.COM You should have received a copy of the GNU General Public License along 18*9663SMark.Logan@Sun.COM with this program; if not, write to the Free Software Foundation, 19*9663SMark.Logan@Sun.COM Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 20*9663SMark.Logan@Sun.COM 21*9663SMark.Logan@Sun.COM #ifndef _REGEX_H 22*9663SMark.Logan@Sun.COM #define _REGEX_H 1 23*9663SMark.Logan@Sun.COM 24*9663SMark.Logan@Sun.COM #include <sys/types.h> 25*9663SMark.Logan@Sun.COM 26*9663SMark.Logan@Sun.COM /* Allow the use in C++ code. */ 27*9663SMark.Logan@Sun.COM #ifdef __cplusplus 28*9663SMark.Logan@Sun.COM extern "C" { 29*9663SMark.Logan@Sun.COM #endif 30*9663SMark.Logan@Sun.COM 31*9663SMark.Logan@Sun.COM /* Define __USE_GNU_REGEX to declare GNU extensions that violate the 32*9663SMark.Logan@Sun.COM POSIX name space rules. */ 33*9663SMark.Logan@Sun.COM #undef __USE_GNU_REGEX 34*9663SMark.Logan@Sun.COM #if (defined _GNU_SOURCE \ 35*9663SMark.Logan@Sun.COM || (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \ 36*9663SMark.Logan@Sun.COM && !defined _XOPEN_SOURCE)) 37*9663SMark.Logan@Sun.COM # define __USE_GNU_REGEX 1 38*9663SMark.Logan@Sun.COM #endif 39*9663SMark.Logan@Sun.COM 40*9663SMark.Logan@Sun.COM #ifdef _REGEX_LARGE_OFFSETS 41*9663SMark.Logan@Sun.COM 42*9663SMark.Logan@Sun.COM /* Use types and values that are wide enough to represent signed and 43*9663SMark.Logan@Sun.COM unsigned byte offsets in memory. This currently works only when 44*9663SMark.Logan@Sun.COM the regex code is used outside of the GNU C library; it is not yet 45*9663SMark.Logan@Sun.COM supported within glibc itself, and glibc users should not define 46*9663SMark.Logan@Sun.COM _REGEX_LARGE_OFFSETS. */ 47*9663SMark.Logan@Sun.COM 48*9663SMark.Logan@Sun.COM /* The type of the offset of a byte within a string. 49*9663SMark.Logan@Sun.COM For historical reasons POSIX 1003.1-2004 requires that regoff_t be 50*9663SMark.Logan@Sun.COM at least as wide as off_t. However, many common POSIX platforms set 51*9663SMark.Logan@Sun.COM regoff_t to the more-sensible ssize_t and the Open Group has 52*9663SMark.Logan@Sun.COM signalled its intention to change the requirement to be that 53*9663SMark.Logan@Sun.COM regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN 54*9663SMark.Logan@Sun.COM 60 (2005-08-25). We don't know of any hosts where ssize_t or 55*9663SMark.Logan@Sun.COM ptrdiff_t is wider than ssize_t, so ssize_t is safe. */ 56*9663SMark.Logan@Sun.COM typedef ssize_t regoff_t; 57*9663SMark.Logan@Sun.COM 58*9663SMark.Logan@Sun.COM /* The type of nonnegative object indexes. Traditionally, GNU regex 59*9663SMark.Logan@Sun.COM uses 'int' for these. Code that uses __re_idx_t should work 60*9663SMark.Logan@Sun.COM regardless of whether the type is signed. */ 61*9663SMark.Logan@Sun.COM typedef size_t __re_idx_t; 62*9663SMark.Logan@Sun.COM 63*9663SMark.Logan@Sun.COM /* The type of object sizes. */ 64*9663SMark.Logan@Sun.COM typedef size_t __re_size_t; 65*9663SMark.Logan@Sun.COM 66*9663SMark.Logan@Sun.COM /* The type of object sizes, in places where the traditional code 67*9663SMark.Logan@Sun.COM uses unsigned long int. */ 68*9663SMark.Logan@Sun.COM typedef size_t __re_long_size_t; 69*9663SMark.Logan@Sun.COM 70*9663SMark.Logan@Sun.COM #else 71*9663SMark.Logan@Sun.COM 72*9663SMark.Logan@Sun.COM /* Use types that are binary-compatible with the traditional GNU regex 73*9663SMark.Logan@Sun.COM implementation, which mishandles strings longer than INT_MAX. */ 74*9663SMark.Logan@Sun.COM 75*9663SMark.Logan@Sun.COM typedef int regoff_t; 76*9663SMark.Logan@Sun.COM typedef int __re_idx_t; 77*9663SMark.Logan@Sun.COM typedef unsigned int __re_size_t; 78*9663SMark.Logan@Sun.COM typedef unsigned long int __re_long_size_t; 79*9663SMark.Logan@Sun.COM 80*9663SMark.Logan@Sun.COM #endif 81*9663SMark.Logan@Sun.COM 82*9663SMark.Logan@Sun.COM /* The following two types have to be signed and unsigned integer type 83*9663SMark.Logan@Sun.COM wide enough to hold a value of a pointer. For most ANSI compilers 84*9663SMark.Logan@Sun.COM ptrdiff_t and size_t should be likely OK. Still size of these two 85*9663SMark.Logan@Sun.COM types is 2 for Microsoft C. Ugh... */ 86*9663SMark.Logan@Sun.COM typedef long int s_reg_t; 87*9663SMark.Logan@Sun.COM typedef unsigned long int active_reg_t; 88*9663SMark.Logan@Sun.COM 89*9663SMark.Logan@Sun.COM /* The following bits are used to determine the regexp syntax we 90*9663SMark.Logan@Sun.COM recognize. The set/not-set meanings are chosen so that Emacs syntax 91*9663SMark.Logan@Sun.COM remains the value 0. The bits are given in alphabetical order, and 92*9663SMark.Logan@Sun.COM the definitions shifted by one from the previous bit; thus, when we 93*9663SMark.Logan@Sun.COM add or remove a bit, only one other definition need change. */ 94*9663SMark.Logan@Sun.COM typedef unsigned long int reg_syntax_t; 95*9663SMark.Logan@Sun.COM 96*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 97*9663SMark.Logan@Sun.COM 98*9663SMark.Logan@Sun.COM /* If this bit is not set, then \ inside a bracket expression is literal. 99*9663SMark.Logan@Sun.COM If set, then such a \ quotes the following character. */ 100*9663SMark.Logan@Sun.COM # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 101*9663SMark.Logan@Sun.COM 102*9663SMark.Logan@Sun.COM /* If this bit is not set, then + and ? are operators, and \+ and \? are 103*9663SMark.Logan@Sun.COM literals. 104*9663SMark.Logan@Sun.COM If set, then \+ and \? are operators and + and ? are literals. */ 105*9663SMark.Logan@Sun.COM # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 106*9663SMark.Logan@Sun.COM 107*9663SMark.Logan@Sun.COM /* If this bit is set, then character classes are supported. They are: 108*9663SMark.Logan@Sun.COM [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 109*9663SMark.Logan@Sun.COM [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 110*9663SMark.Logan@Sun.COM If not set, then character classes are not supported. */ 111*9663SMark.Logan@Sun.COM # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 112*9663SMark.Logan@Sun.COM 113*9663SMark.Logan@Sun.COM /* If this bit is set, then ^ and $ are always anchors (outside bracket 114*9663SMark.Logan@Sun.COM expressions, of course). 115*9663SMark.Logan@Sun.COM If this bit is not set, then it depends: 116*9663SMark.Logan@Sun.COM ^ is an anchor if it is at the beginning of a regular 117*9663SMark.Logan@Sun.COM expression or after an open-group or an alternation operator; 118*9663SMark.Logan@Sun.COM $ is an anchor if it is at the end of a regular expression, or 119*9663SMark.Logan@Sun.COM before a close-group or an alternation operator. 120*9663SMark.Logan@Sun.COM 121*9663SMark.Logan@Sun.COM This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 122*9663SMark.Logan@Sun.COM POSIX draft 11.2 says that * etc. in leading positions is undefined. 123*9663SMark.Logan@Sun.COM We already implemented a previous draft which made those constructs 124*9663SMark.Logan@Sun.COM invalid, though, so we haven't changed the code back. */ 125*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 126*9663SMark.Logan@Sun.COM 127*9663SMark.Logan@Sun.COM /* If this bit is set, then special characters are always special 128*9663SMark.Logan@Sun.COM regardless of where they are in the pattern. 129*9663SMark.Logan@Sun.COM If this bit is not set, then special characters are special only in 130*9663SMark.Logan@Sun.COM some contexts; otherwise they are ordinary. Specifically, 131*9663SMark.Logan@Sun.COM * + ? and intervals are only special when not after the beginning, 132*9663SMark.Logan@Sun.COM open-group, or alternation operator. */ 133*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 134*9663SMark.Logan@Sun.COM 135*9663SMark.Logan@Sun.COM /* If this bit is set, then *, +, ?, and { cannot be first in an re or 136*9663SMark.Logan@Sun.COM immediately after an alternation or begin-group operator. */ 137*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 138*9663SMark.Logan@Sun.COM 139*9663SMark.Logan@Sun.COM /* If this bit is set, then . matches newline. 140*9663SMark.Logan@Sun.COM If not set, then it doesn't. */ 141*9663SMark.Logan@Sun.COM # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 142*9663SMark.Logan@Sun.COM 143*9663SMark.Logan@Sun.COM /* If this bit is set, then . doesn't match NUL. 144*9663SMark.Logan@Sun.COM If not set, then it does. */ 145*9663SMark.Logan@Sun.COM # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 146*9663SMark.Logan@Sun.COM 147*9663SMark.Logan@Sun.COM /* If this bit is set, nonmatching lists [^...] do not match newline. 148*9663SMark.Logan@Sun.COM If not set, they do. */ 149*9663SMark.Logan@Sun.COM # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 150*9663SMark.Logan@Sun.COM 151*9663SMark.Logan@Sun.COM /* If this bit is set, either \{...\} or {...} defines an 152*9663SMark.Logan@Sun.COM interval, depending on RE_NO_BK_BRACES. 153*9663SMark.Logan@Sun.COM If not set, \{, \}, {, and } are literals. */ 154*9663SMark.Logan@Sun.COM # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 155*9663SMark.Logan@Sun.COM 156*9663SMark.Logan@Sun.COM /* If this bit is set, +, ? and | aren't recognized as operators. 157*9663SMark.Logan@Sun.COM If not set, they are. */ 158*9663SMark.Logan@Sun.COM # define RE_LIMITED_OPS (RE_INTERVALS << 1) 159*9663SMark.Logan@Sun.COM 160*9663SMark.Logan@Sun.COM /* If this bit is set, newline is an alternation operator. 161*9663SMark.Logan@Sun.COM If not set, newline is literal. */ 162*9663SMark.Logan@Sun.COM # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 163*9663SMark.Logan@Sun.COM 164*9663SMark.Logan@Sun.COM /* If this bit is set, then `{...}' defines an interval, and \{ and \} 165*9663SMark.Logan@Sun.COM are literals. 166*9663SMark.Logan@Sun.COM If not set, then `\{...\}' defines an interval. */ 167*9663SMark.Logan@Sun.COM # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 168*9663SMark.Logan@Sun.COM 169*9663SMark.Logan@Sun.COM /* If this bit is set, (...) defines a group, and \( and \) are literals. 170*9663SMark.Logan@Sun.COM If not set, \(...\) defines a group, and ( and ) are literals. */ 171*9663SMark.Logan@Sun.COM # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 172*9663SMark.Logan@Sun.COM 173*9663SMark.Logan@Sun.COM /* If this bit is set, then \<digit> matches <digit>. 174*9663SMark.Logan@Sun.COM If not set, then \<digit> is a back-reference. */ 175*9663SMark.Logan@Sun.COM # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 176*9663SMark.Logan@Sun.COM 177*9663SMark.Logan@Sun.COM /* If this bit is set, then | is an alternation operator, and \| is literal. 178*9663SMark.Logan@Sun.COM If not set, then \| is an alternation operator, and | is literal. */ 179*9663SMark.Logan@Sun.COM # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 180*9663SMark.Logan@Sun.COM 181*9663SMark.Logan@Sun.COM /* If this bit is set, then an ending range point collating higher 182*9663SMark.Logan@Sun.COM than the starting range point, as in [z-a], is invalid. 183*9663SMark.Logan@Sun.COM If not set, then when ending range point collates higher than the 184*9663SMark.Logan@Sun.COM starting range point, the range is ignored. */ 185*9663SMark.Logan@Sun.COM # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 186*9663SMark.Logan@Sun.COM 187*9663SMark.Logan@Sun.COM /* If this bit is set, then an unmatched ) is ordinary. 188*9663SMark.Logan@Sun.COM If not set, then an unmatched ) is invalid. */ 189*9663SMark.Logan@Sun.COM # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 190*9663SMark.Logan@Sun.COM 191*9663SMark.Logan@Sun.COM /* If this bit is set, succeed as soon as we match the whole pattern, 192*9663SMark.Logan@Sun.COM without further backtracking. */ 193*9663SMark.Logan@Sun.COM # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 194*9663SMark.Logan@Sun.COM 195*9663SMark.Logan@Sun.COM /* If this bit is set, do not process the GNU regex operators. 196*9663SMark.Logan@Sun.COM If not set, then the GNU regex operators are recognized. */ 197*9663SMark.Logan@Sun.COM # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 198*9663SMark.Logan@Sun.COM 199*9663SMark.Logan@Sun.COM /* If this bit is set, turn on internal regex debugging. 200*9663SMark.Logan@Sun.COM If not set, and debugging was on, turn it off. 201*9663SMark.Logan@Sun.COM This only works if regex.c is compiled -DDEBUG. 202*9663SMark.Logan@Sun.COM We define this bit always, so that all that's needed to turn on 203*9663SMark.Logan@Sun.COM debugging is to recompile regex.c; the calling code can always have 204*9663SMark.Logan@Sun.COM this bit set, and it won't affect anything in the normal case. */ 205*9663SMark.Logan@Sun.COM # define RE_DEBUG (RE_NO_GNU_OPS << 1) 206*9663SMark.Logan@Sun.COM 207*9663SMark.Logan@Sun.COM /* If this bit is set, a syntactically invalid interval is treated as 208*9663SMark.Logan@Sun.COM a string of ordinary characters. For example, the ERE 'a{1' is 209*9663SMark.Logan@Sun.COM treated as 'a\{1'. */ 210*9663SMark.Logan@Sun.COM # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 211*9663SMark.Logan@Sun.COM 212*9663SMark.Logan@Sun.COM /* If this bit is set, then ignore case when matching. 213*9663SMark.Logan@Sun.COM If not set, then case is significant. */ 214*9663SMark.Logan@Sun.COM # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) 215*9663SMark.Logan@Sun.COM 216*9663SMark.Logan@Sun.COM /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only 217*9663SMark.Logan@Sun.COM for ^, because it is difficult to scan the regex backwards to find 218*9663SMark.Logan@Sun.COM whether ^ should be special. */ 219*9663SMark.Logan@Sun.COM # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) 220*9663SMark.Logan@Sun.COM 221*9663SMark.Logan@Sun.COM /* If this bit is set, then \{ cannot be first in an bre or 222*9663SMark.Logan@Sun.COM immediately after an alternation or begin-group operator. */ 223*9663SMark.Logan@Sun.COM # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) 224*9663SMark.Logan@Sun.COM 225*9663SMark.Logan@Sun.COM /* If this bit is set, then no_sub will be set to 1 during 226*9663SMark.Logan@Sun.COM re_compile_pattern. */ 227*9663SMark.Logan@Sun.COM # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) 228*9663SMark.Logan@Sun.COM 229*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */ 230*9663SMark.Logan@Sun.COM 231*9663SMark.Logan@Sun.COM /* This global variable defines the particular regexp syntax to use (for 232*9663SMark.Logan@Sun.COM some interfaces). When a regexp is compiled, the syntax used is 233*9663SMark.Logan@Sun.COM stored in the pattern buffer, so changing this does not affect 234*9663SMark.Logan@Sun.COM already-compiled regexps. */ 235*9663SMark.Logan@Sun.COM extern reg_syntax_t re_syntax_options; 236*9663SMark.Logan@Sun.COM 237*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 238*9663SMark.Logan@Sun.COM /* Define combinations of the above bits for the standard possibilities. 239*9663SMark.Logan@Sun.COM (The [[[ comments delimit what gets put into the Texinfo file, so 240*9663SMark.Logan@Sun.COM don't delete them!) */ 241*9663SMark.Logan@Sun.COM /* [[[begin syntaxes]]] */ 242*9663SMark.Logan@Sun.COM # define RE_SYNTAX_EMACS 0 243*9663SMark.Logan@Sun.COM 244*9663SMark.Logan@Sun.COM # define RE_SYNTAX_AWK \ 245*9663SMark.Logan@Sun.COM (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 246*9663SMark.Logan@Sun.COM | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 247*9663SMark.Logan@Sun.COM | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 248*9663SMark.Logan@Sun.COM | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 249*9663SMark.Logan@Sun.COM | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 250*9663SMark.Logan@Sun.COM 251*9663SMark.Logan@Sun.COM # define RE_SYNTAX_GNU_AWK \ 252*9663SMark.Logan@Sun.COM ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ 253*9663SMark.Logan@Sun.COM & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ 254*9663SMark.Logan@Sun.COM | RE_CONTEXT_INVALID_OPS )) 255*9663SMark.Logan@Sun.COM 256*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_AWK \ 257*9663SMark.Logan@Sun.COM (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 258*9663SMark.Logan@Sun.COM | RE_INTERVALS | RE_NO_GNU_OPS) 259*9663SMark.Logan@Sun.COM 260*9663SMark.Logan@Sun.COM # define RE_SYNTAX_GREP \ 261*9663SMark.Logan@Sun.COM (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 262*9663SMark.Logan@Sun.COM | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 263*9663SMark.Logan@Sun.COM | RE_NEWLINE_ALT) 264*9663SMark.Logan@Sun.COM 265*9663SMark.Logan@Sun.COM # define RE_SYNTAX_EGREP \ 266*9663SMark.Logan@Sun.COM (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 267*9663SMark.Logan@Sun.COM | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 268*9663SMark.Logan@Sun.COM | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 269*9663SMark.Logan@Sun.COM | RE_NO_BK_VBAR) 270*9663SMark.Logan@Sun.COM 271*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_EGREP \ 272*9663SMark.Logan@Sun.COM (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 273*9663SMark.Logan@Sun.COM | RE_INVALID_INTERVAL_ORD) 274*9663SMark.Logan@Sun.COM 275*9663SMark.Logan@Sun.COM /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 276*9663SMark.Logan@Sun.COM # define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 277*9663SMark.Logan@Sun.COM 278*9663SMark.Logan@Sun.COM # define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 279*9663SMark.Logan@Sun.COM 280*9663SMark.Logan@Sun.COM /* Syntax bits common to both basic and extended POSIX regex syntax. */ 281*9663SMark.Logan@Sun.COM # define _RE_SYNTAX_POSIX_COMMON \ 282*9663SMark.Logan@Sun.COM (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 283*9663SMark.Logan@Sun.COM | RE_INTERVALS | RE_NO_EMPTY_RANGES) 284*9663SMark.Logan@Sun.COM 285*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_BASIC \ 286*9663SMark.Logan@Sun.COM (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) 287*9663SMark.Logan@Sun.COM 288*9663SMark.Logan@Sun.COM /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 289*9663SMark.Logan@Sun.COM RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 290*9663SMark.Logan@Sun.COM isn't minimal, since other operators, such as \`, aren't disabled. */ 291*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 292*9663SMark.Logan@Sun.COM (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 293*9663SMark.Logan@Sun.COM 294*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_EXTENDED \ 295*9663SMark.Logan@Sun.COM (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 296*9663SMark.Logan@Sun.COM | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 297*9663SMark.Logan@Sun.COM | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 298*9663SMark.Logan@Sun.COM | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 299*9663SMark.Logan@Sun.COM 300*9663SMark.Logan@Sun.COM /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 301*9663SMark.Logan@Sun.COM removed and RE_NO_BK_REFS is added. */ 302*9663SMark.Logan@Sun.COM # define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 303*9663SMark.Logan@Sun.COM (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 304*9663SMark.Logan@Sun.COM | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 305*9663SMark.Logan@Sun.COM | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 306*9663SMark.Logan@Sun.COM | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 307*9663SMark.Logan@Sun.COM /* [[[end syntaxes]]] */ 308*9663SMark.Logan@Sun.COM 309*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */ 310*9663SMark.Logan@Sun.COM 311*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 312*9663SMark.Logan@Sun.COM 313*9663SMark.Logan@Sun.COM /* Maximum number of duplicates an interval can allow. POSIX-conforming 314*9663SMark.Logan@Sun.COM systems might define this in <limits.h>, but we want our 315*9663SMark.Logan@Sun.COM value, so remove any previous define. */ 316*9663SMark.Logan@Sun.COM # ifdef RE_DUP_MAX 317*9663SMark.Logan@Sun.COM # undef RE_DUP_MAX 318*9663SMark.Logan@Sun.COM # endif 319*9663SMark.Logan@Sun.COM 320*9663SMark.Logan@Sun.COM /* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored 321*9663SMark.Logan@Sun.COM the counter as a 2-byte signed integer. This is no longer true, so 322*9663SMark.Logan@Sun.COM RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to 323*9663SMark.Logan@Sun.COM ((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined. 324*9663SMark.Logan@Sun.COM However, there would be a huge performance problem if someone 325*9663SMark.Logan@Sun.COM actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains 326*9663SMark.Logan@Sun.COM its historical value. */ 327*9663SMark.Logan@Sun.COM # define RE_DUP_MAX (0x7fff) 328*9663SMark.Logan@Sun.COM 329*9663SMark.Logan@Sun.COM #endif /* defined __USE_GNU_REGEX */ 330*9663SMark.Logan@Sun.COM 331*9663SMark.Logan@Sun.COM 332*9663SMark.Logan@Sun.COM /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 333*9663SMark.Logan@Sun.COM 334*9663SMark.Logan@Sun.COM /* If this bit is set, then use extended regular expression syntax. 335*9663SMark.Logan@Sun.COM If not set, then use basic regular expression syntax. */ 336*9663SMark.Logan@Sun.COM #define REG_EXTENDED 1 337*9663SMark.Logan@Sun.COM 338*9663SMark.Logan@Sun.COM /* If this bit is set, then ignore case when matching. 339*9663SMark.Logan@Sun.COM If not set, then case is significant. */ 340*9663SMark.Logan@Sun.COM #define REG_ICASE (1 << 1) 341*9663SMark.Logan@Sun.COM 342*9663SMark.Logan@Sun.COM /* If this bit is set, then anchors do not match at newline 343*9663SMark.Logan@Sun.COM characters in the string. 344*9663SMark.Logan@Sun.COM If not set, then anchors do match at newlines. */ 345*9663SMark.Logan@Sun.COM #define REG_NEWLINE (1 << 2) 346*9663SMark.Logan@Sun.COM 347*9663SMark.Logan@Sun.COM /* If this bit is set, then report only success or fail in regexec. 348*9663SMark.Logan@Sun.COM If not set, then returns differ between not matching and errors. */ 349*9663SMark.Logan@Sun.COM #define REG_NOSUB (1 << 3) 350*9663SMark.Logan@Sun.COM 351*9663SMark.Logan@Sun.COM 352*9663SMark.Logan@Sun.COM /* POSIX `eflags' bits (i.e., information for regexec). */ 353*9663SMark.Logan@Sun.COM 354*9663SMark.Logan@Sun.COM /* If this bit is set, then the beginning-of-line operator doesn't match 355*9663SMark.Logan@Sun.COM the beginning of the string (presumably because it's not the 356*9663SMark.Logan@Sun.COM beginning of a line). 357*9663SMark.Logan@Sun.COM If not set, then the beginning-of-line operator does match the 358*9663SMark.Logan@Sun.COM beginning of the string. */ 359*9663SMark.Logan@Sun.COM #define REG_NOTBOL 1 360*9663SMark.Logan@Sun.COM 361*9663SMark.Logan@Sun.COM /* Like REG_NOTBOL, except for the end-of-line. */ 362*9663SMark.Logan@Sun.COM #define REG_NOTEOL (1 << 1) 363*9663SMark.Logan@Sun.COM 364*9663SMark.Logan@Sun.COM /* Use PMATCH[0] to delimit the start and end of the search in the 365*9663SMark.Logan@Sun.COM buffer. */ 366*9663SMark.Logan@Sun.COM #define REG_STARTEND (1 << 2) 367*9663SMark.Logan@Sun.COM 368*9663SMark.Logan@Sun.COM 369*9663SMark.Logan@Sun.COM /* If any error codes are removed, changed, or added, update the 370*9663SMark.Logan@Sun.COM `__re_error_msgid' table in regcomp.c. */ 371*9663SMark.Logan@Sun.COM 372*9663SMark.Logan@Sun.COM typedef enum 373*9663SMark.Logan@Sun.COM { 374*9663SMark.Logan@Sun.COM _REG_ENOSYS = -1, /* This will never happen for this implementation. */ 375*9663SMark.Logan@Sun.COM _REG_NOERROR = 0, /* Success. */ 376*9663SMark.Logan@Sun.COM _REG_NOMATCH, /* Didn't find a match (for regexec). */ 377*9663SMark.Logan@Sun.COM 378*9663SMark.Logan@Sun.COM /* POSIX regcomp return error codes. (In the order listed in the 379*9663SMark.Logan@Sun.COM standard.) */ 380*9663SMark.Logan@Sun.COM _REG_BADPAT, /* Invalid pattern. */ 381*9663SMark.Logan@Sun.COM _REG_ECOLLATE, /* Invalid collating element. */ 382*9663SMark.Logan@Sun.COM _REG_ECTYPE, /* Invalid character class name. */ 383*9663SMark.Logan@Sun.COM _REG_EESCAPE, /* Trailing backslash. */ 384*9663SMark.Logan@Sun.COM _REG_ESUBREG, /* Invalid back reference. */ 385*9663SMark.Logan@Sun.COM _REG_EBRACK, /* Unmatched left bracket. */ 386*9663SMark.Logan@Sun.COM _REG_EPAREN, /* Parenthesis imbalance. */ 387*9663SMark.Logan@Sun.COM _REG_EBRACE, /* Unmatched \{. */ 388*9663SMark.Logan@Sun.COM _REG_BADBR, /* Invalid contents of \{\}. */ 389*9663SMark.Logan@Sun.COM _REG_ERANGE, /* Invalid range end. */ 390*9663SMark.Logan@Sun.COM _REG_ESPACE, /* Ran out of memory. */ 391*9663SMark.Logan@Sun.COM _REG_BADRPT, /* No preceding re for repetition op. */ 392*9663SMark.Logan@Sun.COM 393*9663SMark.Logan@Sun.COM /* Error codes we've added. */ 394*9663SMark.Logan@Sun.COM _REG_EEND, /* Premature end. */ 395*9663SMark.Logan@Sun.COM _REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 396*9663SMark.Logan@Sun.COM _REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 397*9663SMark.Logan@Sun.COM } reg_errcode_t; 398*9663SMark.Logan@Sun.COM 399*9663SMark.Logan@Sun.COM #ifdef _XOPEN_SOURCE 400*9663SMark.Logan@Sun.COM # define REG_ENOSYS _REG_ENOSYS 401*9663SMark.Logan@Sun.COM #endif 402*9663SMark.Logan@Sun.COM #define REG_NOERROR _REG_NOERROR 403*9663SMark.Logan@Sun.COM #define REG_NOMATCH _REG_NOMATCH 404*9663SMark.Logan@Sun.COM #define REG_BADPAT _REG_BADPAT 405*9663SMark.Logan@Sun.COM #define REG_ECOLLATE _REG_ECOLLATE 406*9663SMark.Logan@Sun.COM #define REG_ECTYPE _REG_ECTYPE 407*9663SMark.Logan@Sun.COM #define REG_EESCAPE _REG_EESCAPE 408*9663SMark.Logan@Sun.COM #define REG_ESUBREG _REG_ESUBREG 409*9663SMark.Logan@Sun.COM #define REG_EBRACK _REG_EBRACK 410*9663SMark.Logan@Sun.COM #define REG_EPAREN _REG_EPAREN 411*9663SMark.Logan@Sun.COM #define REG_EBRACE _REG_EBRACE 412*9663SMark.Logan@Sun.COM #define REG_BADBR _REG_BADBR 413*9663SMark.Logan@Sun.COM #define REG_ERANGE _REG_ERANGE 414*9663SMark.Logan@Sun.COM #define REG_ESPACE _REG_ESPACE 415*9663SMark.Logan@Sun.COM #define REG_BADRPT _REG_BADRPT 416*9663SMark.Logan@Sun.COM #define REG_EEND _REG_EEND 417*9663SMark.Logan@Sun.COM #define REG_ESIZE _REG_ESIZE 418*9663SMark.Logan@Sun.COM #define REG_ERPAREN _REG_ERPAREN 419*9663SMark.Logan@Sun.COM 420*9663SMark.Logan@Sun.COM /* struct re_pattern_buffer normally uses member names like `buffer' 421*9663SMark.Logan@Sun.COM that POSIX does not allow. In POSIX mode these members have names 422*9663SMark.Logan@Sun.COM with leading `re_' (e.g., `re_buffer'). */ 423*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 424*9663SMark.Logan@Sun.COM # define _REG_RE_NAME(id) id 425*9663SMark.Logan@Sun.COM # define _REG_RM_NAME(id) id 426*9663SMark.Logan@Sun.COM #else 427*9663SMark.Logan@Sun.COM # define _REG_RE_NAME(id) re_##id 428*9663SMark.Logan@Sun.COM # define _REG_RM_NAME(id) rm_##id 429*9663SMark.Logan@Sun.COM #endif 430*9663SMark.Logan@Sun.COM 431*9663SMark.Logan@Sun.COM /* The user can specify the type of the re_translate member by 432*9663SMark.Logan@Sun.COM defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned 433*9663SMark.Logan@Sun.COM char *. This pollutes the POSIX name space, so in POSIX mode just 434*9663SMark.Logan@Sun.COM use unsigned char *. */ 435*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 436*9663SMark.Logan@Sun.COM # ifndef RE_TRANSLATE_TYPE 437*9663SMark.Logan@Sun.COM # define RE_TRANSLATE_TYPE unsigned char * 438*9663SMark.Logan@Sun.COM # endif 439*9663SMark.Logan@Sun.COM # define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE 440*9663SMark.Logan@Sun.COM #else 441*9663SMark.Logan@Sun.COM # define REG_TRANSLATE_TYPE unsigned char * 442*9663SMark.Logan@Sun.COM #endif 443*9663SMark.Logan@Sun.COM 444*9663SMark.Logan@Sun.COM /* This data structure represents a compiled pattern. Before calling 445*9663SMark.Logan@Sun.COM the pattern compiler, the fields `buffer', `allocated', `fastmap', 446*9663SMark.Logan@Sun.COM `translate', and `no_sub' can be set. After the pattern has been 447*9663SMark.Logan@Sun.COM compiled, the `re_nsub' field is available. All other fields are 448*9663SMark.Logan@Sun.COM private to the regex routines. */ 449*9663SMark.Logan@Sun.COM 450*9663SMark.Logan@Sun.COM struct re_pattern_buffer 451*9663SMark.Logan@Sun.COM { 452*9663SMark.Logan@Sun.COM /* Space that holds the compiled pattern. It is declared as 453*9663SMark.Logan@Sun.COM `unsigned char *' because its elements are sometimes used as 454*9663SMark.Logan@Sun.COM array indexes. */ 455*9663SMark.Logan@Sun.COM unsigned char *_REG_RE_NAME (buffer); 456*9663SMark.Logan@Sun.COM 457*9663SMark.Logan@Sun.COM /* Number of bytes to which `buffer' points. */ 458*9663SMark.Logan@Sun.COM __re_long_size_t _REG_RE_NAME (allocated); 459*9663SMark.Logan@Sun.COM 460*9663SMark.Logan@Sun.COM /* Number of bytes actually used in `buffer'. */ 461*9663SMark.Logan@Sun.COM __re_long_size_t _REG_RE_NAME (used); 462*9663SMark.Logan@Sun.COM 463*9663SMark.Logan@Sun.COM /* Syntax setting with which the pattern was compiled. */ 464*9663SMark.Logan@Sun.COM reg_syntax_t _REG_RE_NAME (syntax); 465*9663SMark.Logan@Sun.COM 466*9663SMark.Logan@Sun.COM /* Pointer to a fastmap, if any, otherwise zero. re_search uses the 467*9663SMark.Logan@Sun.COM fastmap, if there is one, to skip over impossible starting points 468*9663SMark.Logan@Sun.COM for matches. */ 469*9663SMark.Logan@Sun.COM char *_REG_RE_NAME (fastmap); 470*9663SMark.Logan@Sun.COM 471*9663SMark.Logan@Sun.COM /* Either a translate table to apply to all characters before 472*9663SMark.Logan@Sun.COM comparing them, or zero for no translation. The translation is 473*9663SMark.Logan@Sun.COM applied to a pattern when it is compiled and to a string when it 474*9663SMark.Logan@Sun.COM is matched. */ 475*9663SMark.Logan@Sun.COM REG_TRANSLATE_TYPE _REG_RE_NAME (translate); 476*9663SMark.Logan@Sun.COM 477*9663SMark.Logan@Sun.COM /* Number of subexpressions found by the compiler. */ 478*9663SMark.Logan@Sun.COM size_t re_nsub; 479*9663SMark.Logan@Sun.COM 480*9663SMark.Logan@Sun.COM /* Zero if this pattern cannot match the empty string, one else. 481*9663SMark.Logan@Sun.COM Well, in truth it's used only in `re_search_2', to see whether or 482*9663SMark.Logan@Sun.COM not we should use the fastmap, so we don't set this absolutely 483*9663SMark.Logan@Sun.COM perfectly; see `re_compile_fastmap' (the `duplicate' case). */ 484*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (can_be_null) : 1; 485*9663SMark.Logan@Sun.COM 486*9663SMark.Logan@Sun.COM /* If REGS_UNALLOCATED, allocate space in the `regs' structure 487*9663SMark.Logan@Sun.COM for `max (RE_NREGS, re_nsub + 1)' groups. 488*9663SMark.Logan@Sun.COM If REGS_REALLOCATE, reallocate space if necessary. 489*9663SMark.Logan@Sun.COM If REGS_FIXED, use what's there. */ 490*9663SMark.Logan@Sun.COM #ifdef __USE_GNU_REGEX 491*9663SMark.Logan@Sun.COM # define REGS_UNALLOCATED 0 492*9663SMark.Logan@Sun.COM # define REGS_REALLOCATE 1 493*9663SMark.Logan@Sun.COM # define REGS_FIXED 2 494*9663SMark.Logan@Sun.COM #endif 495*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (regs_allocated) : 2; 496*9663SMark.Logan@Sun.COM 497*9663SMark.Logan@Sun.COM /* Set to zero when `regex_compile' compiles a pattern; set to one 498*9663SMark.Logan@Sun.COM by `re_compile_fastmap' if it updates the fastmap. */ 499*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (fastmap_accurate) : 1; 500*9663SMark.Logan@Sun.COM 501*9663SMark.Logan@Sun.COM /* If set, `re_match_2' does not return information about 502*9663SMark.Logan@Sun.COM subexpressions. */ 503*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (no_sub) : 1; 504*9663SMark.Logan@Sun.COM 505*9663SMark.Logan@Sun.COM /* If set, a beginning-of-line anchor doesn't match at the beginning 506*9663SMark.Logan@Sun.COM of the string. */ 507*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (not_bol) : 1; 508*9663SMark.Logan@Sun.COM 509*9663SMark.Logan@Sun.COM /* Similarly for an end-of-line anchor. */ 510*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (not_eol) : 1; 511*9663SMark.Logan@Sun.COM 512*9663SMark.Logan@Sun.COM /* If true, an anchor at a newline matches. */ 513*9663SMark.Logan@Sun.COM unsigned int _REG_RE_NAME (newline_anchor) : 1; 514*9663SMark.Logan@Sun.COM 515*9663SMark.Logan@Sun.COM /* [[[end pattern_buffer]]] */ 516*9663SMark.Logan@Sun.COM }; 517*9663SMark.Logan@Sun.COM 518*9663SMark.Logan@Sun.COM typedef struct re_pattern_buffer regex_t; 519*9663SMark.Logan@Sun.COM 520*9663SMark.Logan@Sun.COM /* This is the structure we store register match data in. See 521*9663SMark.Logan@Sun.COM regex.texinfo for a full description of what registers match. */ 522*9663SMark.Logan@Sun.COM struct re_registers 523*9663SMark.Logan@Sun.COM { 524*9663SMark.Logan@Sun.COM __re_size_t _REG_RM_NAME (num_regs); 525*9663SMark.Logan@Sun.COM regoff_t *_REG_RM_NAME (start); 526*9663SMark.Logan@Sun.COM regoff_t *_REG_RM_NAME (end); 527*9663SMark.Logan@Sun.COM }; 528*9663SMark.Logan@Sun.COM 529*9663SMark.Logan@Sun.COM 530*9663SMark.Logan@Sun.COM /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 531*9663SMark.Logan@Sun.COM `re_match_2' returns information about at least this many registers 532*9663SMark.Logan@Sun.COM the first time a `regs' structure is passed. */ 533*9663SMark.Logan@Sun.COM #if !defined RE_NREGS && defined __USE_GNU_REGEX 534*9663SMark.Logan@Sun.COM # define RE_NREGS 30 535*9663SMark.Logan@Sun.COM #endif 536*9663SMark.Logan@Sun.COM 537*9663SMark.Logan@Sun.COM 538*9663SMark.Logan@Sun.COM /* POSIX specification for registers. Aside from the different names than 539*9663SMark.Logan@Sun.COM `re_registers', POSIX uses an array of structures, instead of a 540*9663SMark.Logan@Sun.COM structure of arrays. */ 541*9663SMark.Logan@Sun.COM typedef struct 542*9663SMark.Logan@Sun.COM { 543*9663SMark.Logan@Sun.COM regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 544*9663SMark.Logan@Sun.COM regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 545*9663SMark.Logan@Sun.COM } regmatch_t; 546*9663SMark.Logan@Sun.COM 547*9663SMark.Logan@Sun.COM /* Declarations for routines. */ 548*9663SMark.Logan@Sun.COM 549*9663SMark.Logan@Sun.COM /* Sets the current default syntax to SYNTAX, and return the old syntax. 550*9663SMark.Logan@Sun.COM You can also simply assign to the `re_syntax_options' variable. */ 551*9663SMark.Logan@Sun.COM extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); 552*9663SMark.Logan@Sun.COM 553*9663SMark.Logan@Sun.COM /* Compile the regular expression PATTERN, with length LENGTH 554*9663SMark.Logan@Sun.COM and syntax given by the global `re_syntax_options', into the buffer 555*9663SMark.Logan@Sun.COM BUFFER. Return NULL if successful, and an error string if not. */ 556*9663SMark.Logan@Sun.COM extern const char *re_compile_pattern (const char *__pattern, size_t __length, 557*9663SMark.Logan@Sun.COM struct re_pattern_buffer *__buffer); 558*9663SMark.Logan@Sun.COM 559*9663SMark.Logan@Sun.COM 560*9663SMark.Logan@Sun.COM /* Compile a fastmap for the compiled pattern in BUFFER; used to 561*9663SMark.Logan@Sun.COM accelerate searches. Return 0 if successful and -2 if was an 562*9663SMark.Logan@Sun.COM internal error. */ 563*9663SMark.Logan@Sun.COM extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); 564*9663SMark.Logan@Sun.COM 565*9663SMark.Logan@Sun.COM 566*9663SMark.Logan@Sun.COM /* Search in the string STRING (with length LENGTH) for the pattern 567*9663SMark.Logan@Sun.COM compiled into BUFFER. Start searching at position START, for RANGE 568*9663SMark.Logan@Sun.COM characters. Return the starting position of the match, -1 for no 569*9663SMark.Logan@Sun.COM match, or -2 for an internal error. Also return register 570*9663SMark.Logan@Sun.COM information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 571*9663SMark.Logan@Sun.COM extern regoff_t re_search (struct re_pattern_buffer *__buffer, 572*9663SMark.Logan@Sun.COM const char *__string, __re_idx_t __length, 573*9663SMark.Logan@Sun.COM __re_idx_t __start, regoff_t __range, 574*9663SMark.Logan@Sun.COM struct re_registers *__regs); 575*9663SMark.Logan@Sun.COM 576*9663SMark.Logan@Sun.COM 577*9663SMark.Logan@Sun.COM /* Like `re_search', but search in the concatenation of STRING1 and 578*9663SMark.Logan@Sun.COM STRING2. Also, stop searching at index START + STOP. */ 579*9663SMark.Logan@Sun.COM extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, 580*9663SMark.Logan@Sun.COM const char *__string1, __re_idx_t __length1, 581*9663SMark.Logan@Sun.COM const char *__string2, __re_idx_t __length2, 582*9663SMark.Logan@Sun.COM __re_idx_t __start, regoff_t __range, 583*9663SMark.Logan@Sun.COM struct re_registers *__regs, 584*9663SMark.Logan@Sun.COM __re_idx_t __stop); 585*9663SMark.Logan@Sun.COM 586*9663SMark.Logan@Sun.COM 587*9663SMark.Logan@Sun.COM /* Like `re_search', but return how many characters in STRING the regexp 588*9663SMark.Logan@Sun.COM in BUFFER matched, starting at position START. */ 589*9663SMark.Logan@Sun.COM extern regoff_t re_match (struct re_pattern_buffer *__buffer, 590*9663SMark.Logan@Sun.COM const char *__string, __re_idx_t __length, 591*9663SMark.Logan@Sun.COM __re_idx_t __start, struct re_registers *__regs); 592*9663SMark.Logan@Sun.COM 593*9663SMark.Logan@Sun.COM 594*9663SMark.Logan@Sun.COM /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 595*9663SMark.Logan@Sun.COM extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, 596*9663SMark.Logan@Sun.COM const char *__string1, __re_idx_t __length1, 597*9663SMark.Logan@Sun.COM const char *__string2, __re_idx_t __length2, 598*9663SMark.Logan@Sun.COM __re_idx_t __start, struct re_registers *__regs, 599*9663SMark.Logan@Sun.COM __re_idx_t __stop); 600*9663SMark.Logan@Sun.COM 601*9663SMark.Logan@Sun.COM 602*9663SMark.Logan@Sun.COM /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 603*9663SMark.Logan@Sun.COM ENDS. Subsequent matches using BUFFER and REGS will use this memory 604*9663SMark.Logan@Sun.COM for recording register information. STARTS and ENDS must be 605*9663SMark.Logan@Sun.COM allocated with malloc, and must each be at least `NUM_REGS * sizeof 606*9663SMark.Logan@Sun.COM (regoff_t)' bytes long. 607*9663SMark.Logan@Sun.COM 608*9663SMark.Logan@Sun.COM If NUM_REGS == 0, then subsequent matches should allocate their own 609*9663SMark.Logan@Sun.COM register data. 610*9663SMark.Logan@Sun.COM 611*9663SMark.Logan@Sun.COM Unless this function is called, the first search or match using 612*9663SMark.Logan@Sun.COM PATTERN_BUFFER will allocate its own register data, without 613*9663SMark.Logan@Sun.COM freeing the old data. */ 614*9663SMark.Logan@Sun.COM extern void re_set_registers (struct re_pattern_buffer *__buffer, 615*9663SMark.Logan@Sun.COM struct re_registers *__regs, 616*9663SMark.Logan@Sun.COM __re_size_t __num_regs, 617*9663SMark.Logan@Sun.COM regoff_t *__starts, regoff_t *__ends); 618*9663SMark.Logan@Sun.COM 619*9663SMark.Logan@Sun.COM #if defined _REGEX_RE_COMP || defined _LIBC 620*9663SMark.Logan@Sun.COM # ifndef _CRAY 621*9663SMark.Logan@Sun.COM /* 4.2 bsd compatibility. */ 622*9663SMark.Logan@Sun.COM extern char *re_comp (const char *); 623*9663SMark.Logan@Sun.COM extern int re_exec (const char *); 624*9663SMark.Logan@Sun.COM # endif 625*9663SMark.Logan@Sun.COM #endif 626*9663SMark.Logan@Sun.COM 627*9663SMark.Logan@Sun.COM /* GCC 2.95 and later have "__restrict"; C99 compilers have 628*9663SMark.Logan@Sun.COM "restrict", and "configure" may have defined "restrict". 629*9663SMark.Logan@Sun.COM Other compilers use __restrict, __restrict__, and _Restrict, and 630*9663SMark.Logan@Sun.COM 'configure' might #define 'restrict' to those words, so pick a 631*9663SMark.Logan@Sun.COM different name. */ 632*9663SMark.Logan@Sun.COM #ifndef _Restrict_ 633*9663SMark.Logan@Sun.COM # if 199901L <= __STDC_VERSION__ 634*9663SMark.Logan@Sun.COM # define _Restrict_ restrict 635*9663SMark.Logan@Sun.COM # elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__) 636*9663SMark.Logan@Sun.COM # define _Restrict_ __restrict 637*9663SMark.Logan@Sun.COM # else 638*9663SMark.Logan@Sun.COM # define _Restrict_ 639*9663SMark.Logan@Sun.COM # endif 640*9663SMark.Logan@Sun.COM #endif 641*9663SMark.Logan@Sun.COM /* gcc 3.1 and up support the [restrict] syntax. Don't trust 642*9663SMark.Logan@Sun.COM sys/cdefs.h's definition of __restrict_arr, though, as it 643*9663SMark.Logan@Sun.COM mishandles gcc -ansi -pedantic. */ 644*9663SMark.Logan@Sun.COM #ifndef _Restrict_arr_ 645*9663SMark.Logan@Sun.COM # if ((199901L <= __STDC_VERSION__ \ 646*9663SMark.Logan@Sun.COM || ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \ 647*9663SMark.Logan@Sun.COM && !__STRICT_ANSI__)) \ 648*9663SMark.Logan@Sun.COM && !defined __GNUG__) 649*9663SMark.Logan@Sun.COM # define _Restrict_arr_ _Restrict_ 650*9663SMark.Logan@Sun.COM # else 651*9663SMark.Logan@Sun.COM # define _Restrict_arr_ 652*9663SMark.Logan@Sun.COM # endif 653*9663SMark.Logan@Sun.COM #endif 654*9663SMark.Logan@Sun.COM 655*9663SMark.Logan@Sun.COM /* POSIX compatibility. */ 656*9663SMark.Logan@Sun.COM extern int regcomp (regex_t *_Restrict_ __preg, 657*9663SMark.Logan@Sun.COM const char *_Restrict_ __pattern, 658*9663SMark.Logan@Sun.COM int __cflags); 659*9663SMark.Logan@Sun.COM 660*9663SMark.Logan@Sun.COM extern int regexec (const regex_t *_Restrict_ __preg, 661*9663SMark.Logan@Sun.COM const char *_Restrict_ __string, size_t __nmatch, 662*9663SMark.Logan@Sun.COM regmatch_t __pmatch[_Restrict_arr_], 663*9663SMark.Logan@Sun.COM int __eflags); 664*9663SMark.Logan@Sun.COM 665*9663SMark.Logan@Sun.COM extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, 666*9663SMark.Logan@Sun.COM char *_Restrict_ __errbuf, size_t __errbuf_size); 667*9663SMark.Logan@Sun.COM 668*9663SMark.Logan@Sun.COM extern void regfree (regex_t *__preg); 669*9663SMark.Logan@Sun.COM 670*9663SMark.Logan@Sun.COM 671*9663SMark.Logan@Sun.COM #ifdef __cplusplus 672*9663SMark.Logan@Sun.COM } 673*9663SMark.Logan@Sun.COM #endif /* C++ */ 674*9663SMark.Logan@Sun.COM 675*9663SMark.Logan@Sun.COM #endif /* regex.h */ 676