1*58185Selan /* Definitions for data structures callers pass the regex library. 2*58185Selan 3*58185Selan Copyright (C) 1985, 1989-92 Free Software Foundation, Inc. 4*58185Selan 5*58185Selan This file is part of the GNU C++ Library. This library is free 6*58185Selan software; you can redistribute it and/or modify it under the terms of 7*58185Selan the GNU Library General Public License as published by the Free 8*58185Selan Software Foundation; either version 2 of the License, or (at your 9*58185Selan option) any later version. This library is distributed in the hope 10*58185Selan that it will be useful, but WITHOUT ANY WARRANTY; without even the 11*58185Selan implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 12*58185Selan PURPOSE. See the GNU Library General Public License for more details. 13*58185Selan You should have received a copy of the GNU Library General Public 14*58185Selan License along with this library; if not, write to the Free Software 15*58185Selan Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 16*58185Selan */ 17*58185Selan 18*58185Selan #ifndef __REGEXP_LIBRARY 19*58185Selan #define __REGEXP_LIBRARY 20*58185Selan 21*58185Selan #if defined(SHORT_NAMES) || defined(VMS) 22*58185Selan #define re_compile_pattern recmppat 23*58185Selan #define re_pattern_buffer repatbuf 24*58185Selan #define re_registers reregs 25*58185Selan #endif 26*58185Selan 27*58185Selan #ifdef __cplusplus 28*58185Selan extern "C" { 29*58185Selan #endif 30*58185Selan 31*58185Selan /* Define number of parens for which we record the beginnings and ends. 32*58185Selan This affects how much space the `struct re_registers' type takes up. */ 33*58185Selan #ifndef RE_NREGS 34*58185Selan #define RE_NREGS 10 35*58185Selan #endif 36*58185Selan 37*58185Selan #define BYTEWIDTH 8 38*58185Selan 39*58185Selan 40*58185Selan /* Maximum number of duplicates an interval can allow. */ 41*58185Selan #ifndef RE_DUP_MAX /* kludge for AIX, which defines it */ 42*58185Selan #define RE_DUP_MAX ((1 << 15) - 1) 43*58185Selan #endif 44*58185Selan 45*58185Selan /* This defines the various regexp syntaxes. */ 46*58185Selan extern int obscure_syntax; 47*58185Selan 48*58185Selan 49*58185Selan /* The following bits are used in the obscure_syntax variable to choose among 50*58185Selan alternative regexp syntaxes. */ 51*58185Selan 52*58185Selan /* If this bit is set, plain parentheses serve as grouping, and backslash 53*58185Selan parentheses are needed for literal searching. 54*58185Selan If not set, backslash-parentheses are grouping, and plain parentheses 55*58185Selan are for literal searching. */ 56*58185Selan #define RE_NO_BK_PARENS 1 57*58185Selan 58*58185Selan /* If this bit is set, plain | serves as the `or'-operator, and \| is a 59*58185Selan literal. 60*58185Selan If not set, \| serves as the `or'-operator, and | is a literal. */ 61*58185Selan #define RE_NO_BK_VBAR (1 << 1) 62*58185Selan 63*58185Selan /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are 64*58185Selan literals. 65*58185Selan If set, \+, \? are operators and plain +, ? are literals. */ 66*58185Selan #define RE_BK_PLUS_QM (1 << 2) 67*58185Selan 68*58185Selan /* If this bit is set, | binds tighter than ^ or $. 69*58185Selan If not set, the contrary. */ 70*58185Selan #define RE_TIGHT_VBAR (1 << 3) 71*58185Selan 72*58185Selan /* If this bit is set, then treat newline as an OR operator. 73*58185Selan If not set, treat it as a normal character. */ 74*58185Selan #define RE_NEWLINE_OR (1 << 4) 75*58185Selan 76*58185Selan /* If this bit is set, then special characters may act as normal 77*58185Selan characters in some contexts. Specifically, this applies to: 78*58185Selan ^ -- only special at the beginning, or after ( or |; 79*58185Selan $ -- only special at the end, or before ) or |; 80*58185Selan *, +, ? -- only special when not after the beginning, (, or |. 81*58185Selan If this bit is not set, special characters (such as *, ^, and $) 82*58185Selan always have their special meaning regardless of the surrounding 83*58185Selan context. */ 84*58185Selan #define RE_CONTEXT_INDEP_OPS (1 << 5) 85*58185Selan 86*58185Selan /* If this bit is not set, then \ before anything inside [ and ] is taken as 87*58185Selan a real \. 88*58185Selan If set, then such a \ escapes the following character. This is a 89*58185Selan special case for awk. */ 90*58185Selan #define RE_AWK_CLASS_HACK (1 << 6) 91*58185Selan 92*58185Selan /* If this bit is set, then \{ and \} or { and } serve as interval operators. 93*58185Selan If not set, then \{ and \} and { and } are treated as literals. */ 94*58185Selan #define RE_INTERVALS (1 << 7) 95*58185Selan 96*58185Selan /* If this bit is not set, then \{ and \} serve as interval operators and 97*58185Selan { and } are literals. 98*58185Selan If set, then { and } serve as interval operators and \{ and \} are 99*58185Selan literals. */ 100*58185Selan #define RE_NO_BK_CURLY_BRACES (1 << 8) 101*58185Selan 102*58185Selan /* If this bit is set, then character classes are supported; they are: 103*58185Selan [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 104*58185Selan [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 105*58185Selan If not set, then character classes are not supported. */ 106*58185Selan #define RE_CHAR_CLASSES (1 << 9) 107*58185Selan 108*58185Selan /* If this bit is set, then the dot re doesn't match a null byte. 109*58185Selan If not set, it does. */ 110*58185Selan #define RE_DOT_NOT_NULL (1 << 10) 111*58185Selan 112*58185Selan /* If this bit is set, then [^...] doesn't match a newline. 113*58185Selan If not set, it does. */ 114*58185Selan #define RE_HAT_NOT_NEWLINE (1 << 11) 115*58185Selan 116*58185Selan /* If this bit is set, back references are recognized. 117*58185Selan If not set, they aren't. */ 118*58185Selan #define RE_NO_BK_REFS (1 << 12) 119*58185Selan 120*58185Selan /* If this bit is set, back references must refer to a preceding 121*58185Selan subexpression. If not set, a back reference to a nonexistent 122*58185Selan subexpression is treated as literal characters. */ 123*58185Selan #define RE_NO_EMPTY_BK_REF (1 << 13) 124*58185Selan 125*58185Selan /* If this bit is set, bracket expressions can't be empty. 126*58185Selan If it is set, they can be empty. */ 127*58185Selan #define RE_NO_EMPTY_BRACKETS (1 << 14) 128*58185Selan 129*58185Selan /* If this bit is set, then *, +, ? and { cannot be first in an re or 130*58185Selan immediately after a |, or a (. Furthermore, a | cannot be first or 131*58185Selan last in an re, or immediately follow another | or a (. Also, a ^ 132*58185Selan cannot appear in a nonleading position and a $ cannot appear in a 133*58185Selan nontrailing position (outside of bracket expressions, that is). */ 134*58185Selan #define RE_CONTEXTUAL_INVALID_OPS (1 << 15) 135*58185Selan 136*58185Selan /* If this bit is set, then +, ? and | aren't recognized as operators. 137*58185Selan If it's not, they are. */ 138*58185Selan #define RE_LIMITED_OPS (1 << 16) 139*58185Selan 140*58185Selan /* If this bit is set, then an ending range point has to collate higher 141*58185Selan or equal to the starting range point. 142*58185Selan If it's not set, then when the ending range point collates higher 143*58185Selan than the starting range point, the range is just considered empty. */ 144*58185Selan #define RE_NO_EMPTY_RANGES (1 << 17) 145*58185Selan 146*58185Selan /* If this bit is set, then a hyphen (-) can't be an ending range point. 147*58185Selan If it isn't, then it can. */ 148*58185Selan #define RE_NO_HYPHEN_RANGE_END (1 << 18) 149*58185Selan 150*58185Selan 151*58185Selan /* Define combinations of bits for the standard possibilities. */ 152*58185Selan #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 153*58185Selan | RE_CONTEXT_INDEP_OPS) 154*58185Selan #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 155*58185Selan | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) 156*58185Selan #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 157*58185Selan | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) 158*58185Selan #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) 159*58185Selan #define RE_SYNTAX_EMACS 0 160*58185Selan #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ 161*58185Selan | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ 162*58185Selan | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ 163*58185Selan | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ 164*58185Selan | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) 165*58185Selan 166*58185Selan #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ 167*58185Selan | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ 168*58185Selan | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ 169*58185Selan | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ 170*58185Selan | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ 171*58185Selan | RE_NO_HYPHEN_RANGE_END) 172*58185Selan 173*58185Selan 174*58185Selan /* This data structure is used to represent a compiled pattern. */ 175*58185Selan 176*58185Selan struct re_pattern_buffer 177*58185Selan { 178*58185Selan char *buffer; /* Space holding the compiled pattern commands. */ 179*58185Selan long allocated; /* Size of space that `buffer' points to. */ 180*58185Selan long used; /* Length of portion of buffer actually occupied */ 181*58185Selan char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ 182*58185Selan /* re_search uses the fastmap, if there is one, 183*58185Selan to skip over totally implausible characters. */ 184*58185Selan char *translate; /* Translate table to apply to all characters before 185*58185Selan comparing, or zero for no translation. 186*58185Selan The translation is applied to a pattern when it is 187*58185Selan compiled and to data when it is matched. */ 188*58185Selan char fastmap_accurate; 189*58185Selan /* Set to zero when a new pattern is stored, 190*58185Selan set to one when the fastmap is updated from it. */ 191*58185Selan char can_be_null; /* Set to one by compiling fastmap 192*58185Selan if this pattern might match the null string. 193*58185Selan It does not necessarily match the null string 194*58185Selan in that case, but if this is zero, it cannot. 195*58185Selan 2 as value means can match null string 196*58185Selan but at end of range or before a character 197*58185Selan listed in the fastmap. */ 198*58185Selan }; 199*58185Selan 200*58185Selan 201*58185Selan /* search.c (search_buffer) needs this one value. It is defined both in 202*58185Selan regex.c and here. */ 203*58185Selan #define RE_EXACTN_VALUE 1 204*58185Selan 205*58185Selan 206*58185Selan /* Structure to store register contents data in. 207*58185Selan 208*58185Selan Pass the address of such a structure as an argument to re_match, etc., 209*58185Selan if you want this information back. 210*58185Selan 211*58185Selan For i from 1 to RE_NREGS - 1, start[i] records the starting index in 212*58185Selan the string of where the ith subexpression matched, and end[i] records 213*58185Selan one after the ending index. start[0] and end[0] are analogous, for 214*58185Selan the entire pattern. */ 215*58185Selan 216*58185Selan struct re_registers 217*58185Selan { 218*58185Selan int start[RE_NREGS]; 219*58185Selan int end[RE_NREGS]; 220*58185Selan }; 221*58185Selan 222*58185Selan 223*58185Selan 224*58185Selan #if defined(__STDC__) || defined(__cplusplus) 225*58185Selan 226*58185Selan extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *); 227*58185Selan /* Is this really advertised? */ 228*58185Selan extern void re_compile_fastmap (struct re_pattern_buffer *); 229*58185Selan extern int re_search (struct re_pattern_buffer *, char*, int, int, int, 230*58185Selan struct re_registers *); 231*58185Selan extern int re_search_2 (struct re_pattern_buffer *, char *, int, 232*58185Selan char *, int, int, int, 233*58185Selan struct re_registers *, int); 234*58185Selan extern int re_match (struct re_pattern_buffer *, char *, int, int, 235*58185Selan struct re_registers *); 236*58185Selan extern int re_match_2 (struct re_pattern_buffer *, char *, int, 237*58185Selan char *, int, int, struct re_registers *, int); 238*58185Selan 239*58185Selan /* 4.2 bsd compatibility. */ 240*58185Selan extern char *re_comp (char *); 241*58185Selan extern int re_exec (char *); 242*58185Selan 243*58185Selan #else /* !__STDC__ */ 244*58185Selan 245*58185Selan #define const /* nothing */ 246*58185Selan extern char *re_compile_pattern (); 247*58185Selan /* Is this really advertised? */ 248*58185Selan extern void re_compile_fastmap (); 249*58185Selan extern int re_search (), re_search_2 (); 250*58185Selan extern int re_match (), re_match_2 (); 251*58185Selan 252*58185Selan /* 4.2 bsd compatibility. */ 253*58185Selan extern char *re_comp (); 254*58185Selan extern int re_exec (); 255*58185Selan 256*58185Selan #endif /* __STDC__ */ 257*58185Selan 258*58185Selan 259*58185Selan #ifdef SYNTAX_TABLE 260*58185Selan extern char *re_syntax_table; 261*58185Selan #endif 262*58185Selan 263*58185Selan #ifdef __cplusplus 264*58185Selan extern int re_max_failures; 265*58185Selan } 266*58185Selan #endif 267*58185Selan 268*58185Selan #endif /* !__REGEXP_LIBRARY */ 269