158185Selan /* Definitions for data structures callers pass the regex library. 258185Selan 358185Selan Copyright (C) 1985, 1989-92 Free Software Foundation, Inc. 458185Selan 558185Selan This file is part of the GNU C++ Library. This library is free 658185Selan software; you can redistribute it and/or modify it under the terms of 758185Selan the GNU Library General Public License as published by the Free 858185Selan Software Foundation; either version 2 of the License, or (at your 958185Selan option) any later version. This library is distributed in the hope 1058185Selan that it will be useful, but WITHOUT ANY WARRANTY; without even the 1158185Selan implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 1258185Selan PURPOSE. See the GNU Library General Public License for more details. 1358185Selan You should have received a copy of the GNU Library General Public 1458185Selan License along with this library; if not, write to the Free Software 1558185Selan Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 1658185Selan */ 1758185Selan 1858185Selan #ifndef __REGEXP_LIBRARY 1958185Selan #define __REGEXP_LIBRARY 2058185Selan 2158185Selan #if defined(SHORT_NAMES) || defined(VMS) 2258185Selan #define re_compile_pattern recmppat 2358185Selan #define re_pattern_buffer repatbuf 2458185Selan #define re_registers reregs 2558185Selan #endif 2658185Selan 2758185Selan #ifdef __cplusplus 2858185Selan extern "C" { 2958185Selan #endif 3058185Selan 3158185Selan /* Define number of parens for which we record the beginnings and ends. 3258185Selan This affects how much space the `struct re_registers' type takes up. */ 3358185Selan #ifndef RE_NREGS 3458185Selan #define RE_NREGS 10 3558185Selan #endif 3658185Selan 3758185Selan #define BYTEWIDTH 8 3858185Selan 3958185Selan 4058185Selan /* Maximum number of duplicates an interval can allow. */ 4158185Selan #ifndef RE_DUP_MAX /* kludge for AIX, which defines it */ 4258185Selan #define RE_DUP_MAX ((1 << 15) - 1) 4358185Selan #endif 4458185Selan 4558185Selan /* This defines the various regexp syntaxes. */ 4658185Selan extern int obscure_syntax; 4758185Selan 4858185Selan 4958185Selan /* The following bits are used in the obscure_syntax variable to choose among 5058185Selan alternative regexp syntaxes. */ 5158185Selan 5258185Selan /* If this bit is set, plain parentheses serve as grouping, and backslash 5358185Selan parentheses are needed for literal searching. 5458185Selan If not set, backslash-parentheses are grouping, and plain parentheses 5558185Selan are for literal searching. */ 5658185Selan #define RE_NO_BK_PARENS 1 5758185Selan 5858185Selan /* If this bit is set, plain | serves as the `or'-operator, and \| is a 5958185Selan literal. 6058185Selan If not set, \| serves as the `or'-operator, and | is a literal. */ 6158185Selan #define RE_NO_BK_VBAR (1 << 1) 6258185Selan 6358185Selan /* If this bit is not set, plain + or ? serves as an operator, and \+, \? are 6458185Selan literals. 6558185Selan If set, \+, \? are operators and plain +, ? are literals. */ 6658185Selan #define RE_BK_PLUS_QM (1 << 2) 6758185Selan 6858185Selan /* If this bit is set, | binds tighter than ^ or $. 6958185Selan If not set, the contrary. */ 7058185Selan #define RE_TIGHT_VBAR (1 << 3) 7158185Selan 7258185Selan /* If this bit is set, then treat newline as an OR operator. 7358185Selan If not set, treat it as a normal character. */ 7458185Selan #define RE_NEWLINE_OR (1 << 4) 7558185Selan 7658185Selan /* If this bit is set, then special characters may act as normal 7758185Selan characters in some contexts. Specifically, this applies to: 7858185Selan ^ -- only special at the beginning, or after ( or |; 7958185Selan $ -- only special at the end, or before ) or |; 8058185Selan *, +, ? -- only special when not after the beginning, (, or |. 8158185Selan If this bit is not set, special characters (such as *, ^, and $) 8258185Selan always have their special meaning regardless of the surrounding 8358185Selan context. */ 8458185Selan #define RE_CONTEXT_INDEP_OPS (1 << 5) 8558185Selan 8658185Selan /* If this bit is not set, then \ before anything inside [ and ] is taken as 8758185Selan a real \. 8858185Selan If set, then such a \ escapes the following character. This is a 8958185Selan special case for awk. */ 9058185Selan #define RE_AWK_CLASS_HACK (1 << 6) 9158185Selan 9258185Selan /* If this bit is set, then \{ and \} or { and } serve as interval operators. 9358185Selan If not set, then \{ and \} and { and } are treated as literals. */ 9458185Selan #define RE_INTERVALS (1 << 7) 9558185Selan 9658185Selan /* If this bit is not set, then \{ and \} serve as interval operators and 9758185Selan { and } are literals. 9858185Selan If set, then { and } serve as interval operators and \{ and \} are 9958185Selan literals. */ 10058185Selan #define RE_NO_BK_CURLY_BRACES (1 << 8) 10158185Selan 10258185Selan /* If this bit is set, then character classes are supported; they are: 10358185Selan [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 10458185Selan [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 10558185Selan If not set, then character classes are not supported. */ 10658185Selan #define RE_CHAR_CLASSES (1 << 9) 10758185Selan 10858185Selan /* If this bit is set, then the dot re doesn't match a null byte. 10958185Selan If not set, it does. */ 11058185Selan #define RE_DOT_NOT_NULL (1 << 10) 11158185Selan 11258185Selan /* If this bit is set, then [^...] doesn't match a newline. 11358185Selan If not set, it does. */ 11458185Selan #define RE_HAT_NOT_NEWLINE (1 << 11) 11558185Selan 11658185Selan /* If this bit is set, back references are recognized. 11758185Selan If not set, they aren't. */ 11858185Selan #define RE_NO_BK_REFS (1 << 12) 11958185Selan 12058185Selan /* If this bit is set, back references must refer to a preceding 12158185Selan subexpression. If not set, a back reference to a nonexistent 12258185Selan subexpression is treated as literal characters. */ 12358185Selan #define RE_NO_EMPTY_BK_REF (1 << 13) 12458185Selan 12558185Selan /* If this bit is set, bracket expressions can't be empty. 12658185Selan If it is set, they can be empty. */ 12758185Selan #define RE_NO_EMPTY_BRACKETS (1 << 14) 12858185Selan 12958185Selan /* If this bit is set, then *, +, ? and { cannot be first in an re or 13058185Selan immediately after a |, or a (. Furthermore, a | cannot be first or 13158185Selan last in an re, or immediately follow another | or a (. Also, a ^ 13258185Selan cannot appear in a nonleading position and a $ cannot appear in a 13358185Selan nontrailing position (outside of bracket expressions, that is). */ 13458185Selan #define RE_CONTEXTUAL_INVALID_OPS (1 << 15) 13558185Selan 13658185Selan /* If this bit is set, then +, ? and | aren't recognized as operators. 13758185Selan If it's not, they are. */ 13858185Selan #define RE_LIMITED_OPS (1 << 16) 13958185Selan 14058185Selan /* If this bit is set, then an ending range point has to collate higher 14158185Selan or equal to the starting range point. 14258185Selan If it's not set, then when the ending range point collates higher 14358185Selan than the starting range point, the range is just considered empty. */ 14458185Selan #define RE_NO_EMPTY_RANGES (1 << 17) 14558185Selan 14658185Selan /* If this bit is set, then a hyphen (-) can't be an ending range point. 14758185Selan If it isn't, then it can. */ 14858185Selan #define RE_NO_HYPHEN_RANGE_END (1 << 18) 14958185Selan 15058185Selan 15158185Selan /* Define combinations of bits for the standard possibilities. */ 15258185Selan #define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 15358185Selan | RE_CONTEXT_INDEP_OPS) 15458185Selan #define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 15558185Selan | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) 15658185Selan #define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 15758185Selan | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR) 15858185Selan #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) 15958185Selan #define RE_SYNTAX_EMACS 0 16058185Selan #define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \ 16158185Selan | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \ 16258185Selan | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \ 16358185Selan | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \ 16458185Selan | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END) 16558185Selan 16658185Selan #define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \ 16758185Selan | RE_NO_BK_VBAR | RE_NO_BK_PARENS \ 16858185Selan | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \ 16958185Selan | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \ 17058185Selan | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \ 17158185Selan | RE_NO_HYPHEN_RANGE_END) 17258185Selan 17358185Selan 17458185Selan /* This data structure is used to represent a compiled pattern. */ 17558185Selan 17658185Selan struct re_pattern_buffer 17758185Selan { 17858185Selan char *buffer; /* Space holding the compiled pattern commands. */ 17958185Selan long allocated; /* Size of space that `buffer' points to. */ 18058185Selan long used; /* Length of portion of buffer actually occupied */ 18158185Selan char *fastmap; /* Pointer to fastmap, if any, or zero if none. */ 18258185Selan /* re_search uses the fastmap, if there is one, 18358185Selan to skip over totally implausible characters. */ 18458185Selan char *translate; /* Translate table to apply to all characters before 18558185Selan comparing, or zero for no translation. 18658185Selan The translation is applied to a pattern when it is 18758185Selan compiled and to data when it is matched. */ 18858185Selan char fastmap_accurate; 18958185Selan /* Set to zero when a new pattern is stored, 19058185Selan set to one when the fastmap is updated from it. */ 19158185Selan char can_be_null; /* Set to one by compiling fastmap 19258185Selan if this pattern might match the null string. 19358185Selan It does not necessarily match the null string 19458185Selan in that case, but if this is zero, it cannot. 19558185Selan 2 as value means can match null string 19658185Selan but at end of range or before a character 19758185Selan listed in the fastmap. */ 19858185Selan }; 19958185Selan 20058185Selan 20158185Selan /* search.c (search_buffer) needs this one value. It is defined both in 20258185Selan regex.c and here. */ 20358185Selan #define RE_EXACTN_VALUE 1 20458185Selan 20558185Selan 20658185Selan /* Structure to store register contents data in. 20758185Selan 20858185Selan Pass the address of such a structure as an argument to re_match, etc., 20958185Selan if you want this information back. 21058185Selan 21158185Selan For i from 1 to RE_NREGS - 1, start[i] records the starting index in 21258185Selan the string of where the ith subexpression matched, and end[i] records 21358185Selan one after the ending index. start[0] and end[0] are analogous, for 21458185Selan the entire pattern. */ 21558185Selan 21658185Selan struct re_registers 21758185Selan { 21858185Selan int start[RE_NREGS]; 21958185Selan int end[RE_NREGS]; 22058185Selan }; 22158185Selan 22258185Selan 22358185Selan 22458185Selan #if defined(__STDC__) || defined(__cplusplus) 22558185Selan 22658185Selan extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *); 22758185Selan /* Is this really advertised? */ 22858185Selan extern void re_compile_fastmap (struct re_pattern_buffer *); 22958185Selan extern int re_search (struct re_pattern_buffer *, char*, int, int, int, 23058185Selan struct re_registers *); 23158185Selan extern int re_search_2 (struct re_pattern_buffer *, char *, int, 23258185Selan char *, int, int, int, 23358185Selan struct re_registers *, int); 23458185Selan extern int re_match (struct re_pattern_buffer *, char *, int, int, 23558185Selan struct re_registers *); 23658185Selan extern int re_match_2 (struct re_pattern_buffer *, char *, int, 23758185Selan char *, int, int, struct re_registers *, int); 23858185Selan 23958185Selan /* 4.2 bsd compatibility. */ 240*60385Selan extern char *re_comp (const char *); 241*60385Selan extern int re_exec (const char *); 24258185Selan 24358185Selan #else /* !__STDC__ */ 24458185Selan 24558185Selan #define const /* nothing */ 24658185Selan extern char *re_compile_pattern (); 24758185Selan /* Is this really advertised? */ 24858185Selan extern void re_compile_fastmap (); 24958185Selan extern int re_search (), re_search_2 (); 25058185Selan extern int re_match (), re_match_2 (); 25158185Selan 25258185Selan /* 4.2 bsd compatibility. */ 25358185Selan extern char *re_comp (); 25458185Selan extern int re_exec (); 25558185Selan 25658185Selan #endif /* __STDC__ */ 25758185Selan 25858185Selan 25958185Selan #ifdef SYNTAX_TABLE 26058185Selan extern char *re_syntax_table; 26158185Selan #endif 26258185Selan 26358185Selan #ifdef __cplusplus 26458185Selan extern int re_max_failures; 26558185Selan } 26658185Selan #endif 26758185Selan 26858185Selan #endif /* !__REGEXP_LIBRARY */ 269