1*09d4459fSDaniel Fojt /* dfa.h - declarations for GNU deterministic regexp compiler 2*09d4459fSDaniel Fojt Copyright (C) 1988, 1998, 2007, 2009-2020 Free Software Foundation, Inc. 3*09d4459fSDaniel Fojt 4*09d4459fSDaniel Fojt This program is free software; you can redistribute it and/or modify 5*09d4459fSDaniel Fojt it under the terms of the GNU General Public License as published by 6*09d4459fSDaniel Fojt the Free Software Foundation; either version 3, or (at your option) 7*09d4459fSDaniel Fojt any later version. 8*09d4459fSDaniel Fojt 9*09d4459fSDaniel Fojt This program is distributed in the hope that it will be useful, 10*09d4459fSDaniel Fojt but WITHOUT ANY WARRANTY; without even the implied warranty of 11*09d4459fSDaniel Fojt MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*09d4459fSDaniel Fojt GNU General Public License for more details. 13*09d4459fSDaniel Fojt 14*09d4459fSDaniel Fojt You should have received a copy of the GNU General Public License 15*09d4459fSDaniel Fojt along with this program; if not, write to the Free Software 16*09d4459fSDaniel Fojt Foundation, Inc., 17*09d4459fSDaniel Fojt 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ 18*09d4459fSDaniel Fojt 19*09d4459fSDaniel Fojt /* Written June, 1988 by Mike Haertel */ 20*09d4459fSDaniel Fojt 21*09d4459fSDaniel Fojt #include <regex.h> 22*09d4459fSDaniel Fojt #include <stdbool.h> 23*09d4459fSDaniel Fojt #include <stddef.h> 24*09d4459fSDaniel Fojt 25*09d4459fSDaniel Fojt struct localeinfo; /* See localeinfo.h. */ 26*09d4459fSDaniel Fojt 27*09d4459fSDaniel Fojt /* Element of a list of strings, at least one of which is known to 28*09d4459fSDaniel Fojt appear in any R.E. matching the DFA. */ 29*09d4459fSDaniel Fojt struct dfamust 30*09d4459fSDaniel Fojt { 31*09d4459fSDaniel Fojt bool exact; 32*09d4459fSDaniel Fojt bool begline; 33*09d4459fSDaniel Fojt bool endline; 34*09d4459fSDaniel Fojt char must[FLEXIBLE_ARRAY_MEMBER]; 35*09d4459fSDaniel Fojt }; 36*09d4459fSDaniel Fojt 37*09d4459fSDaniel Fojt /* The dfa structure. It is completely opaque. */ 38*09d4459fSDaniel Fojt struct dfa; 39*09d4459fSDaniel Fojt 40*09d4459fSDaniel Fojt /* Needed when Gnulib is not used. */ 41*09d4459fSDaniel Fojt #ifndef _GL_ATTRIBUTE_MALLOC 42*09d4459fSDaniel Fojt # define _GL_ATTRIBUTE_MALLOC 43*09d4459fSDaniel Fojt #endif 44*09d4459fSDaniel Fojt 45*09d4459fSDaniel Fojt /* Entry points. */ 46*09d4459fSDaniel Fojt 47*09d4459fSDaniel Fojt /* Allocate a struct dfa. The struct dfa is completely opaque. 48*09d4459fSDaniel Fojt It should be initialized via dfasyntax or dfacopysyntax before other use. 49*09d4459fSDaniel Fojt The returned pointer should be passed directly to free() after 50*09d4459fSDaniel Fojt calling dfafree() on it. */ 51*09d4459fSDaniel Fojt extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC; 52*09d4459fSDaniel Fojt 53*09d4459fSDaniel Fojt /* DFA options that can be ORed together, for dfasyntax's 4th arg. */ 54*09d4459fSDaniel Fojt enum 55*09d4459fSDaniel Fojt { 56*09d4459fSDaniel Fojt /* ^ and $ match only the start and end of data, and do not match 57*09d4459fSDaniel Fojt end-of-line within data. This is always false for grep, but 58*09d4459fSDaniel Fojt possibly true for other apps. */ 59*09d4459fSDaniel Fojt DFA_ANCHOR = 1 << 0, 60*09d4459fSDaniel Fojt 61*09d4459fSDaniel Fojt /* '\0' in data is end-of-line, instead of the traditional '\n'. */ 62*09d4459fSDaniel Fojt DFA_EOL_NUL = 1 << 1 63*09d4459fSDaniel Fojt }; 64*09d4459fSDaniel Fojt 65*09d4459fSDaniel Fojt /* Initialize or reinitialize a DFA. The arguments are: 66*09d4459fSDaniel Fojt 1. The DFA to operate on. 67*09d4459fSDaniel Fojt 2. Information about the current locale. 68*09d4459fSDaniel Fojt 3. Syntax bits described in regex.h. 69*09d4459fSDaniel Fojt 4. Additional DFA options described above. */ 70*09d4459fSDaniel Fojt extern void dfasyntax (struct dfa *, struct localeinfo const *, 71*09d4459fSDaniel Fojt reg_syntax_t, int); 72*09d4459fSDaniel Fojt 73*09d4459fSDaniel Fojt /* Initialize or reinitialize a DFA from an already-initialized DFA. */ 74*09d4459fSDaniel Fojt extern void dfacopysyntax (struct dfa *, struct dfa const *); 75*09d4459fSDaniel Fojt 76*09d4459fSDaniel Fojt /* Parse the given string of given length into the given struct dfa. */ 77*09d4459fSDaniel Fojt extern void dfaparse (char const *, ptrdiff_t, struct dfa *); 78*09d4459fSDaniel Fojt 79*09d4459fSDaniel Fojt /* Allocate and return a struct dfamust from a struct dfa that was 80*09d4459fSDaniel Fojt initialized by dfaparse and not yet given to dfacomp. */ 81*09d4459fSDaniel Fojt extern struct dfamust *dfamust (struct dfa const *); 82*09d4459fSDaniel Fojt 83*09d4459fSDaniel Fojt /* Free the storage held by the components of a struct dfamust. */ 84*09d4459fSDaniel Fojt extern void dfamustfree (struct dfamust *); 85*09d4459fSDaniel Fojt 86*09d4459fSDaniel Fojt /* Compile the given string of the given length into the given struct dfa. 87*09d4459fSDaniel Fojt The last argument says whether to build a searching or an exact matcher. 88*09d4459fSDaniel Fojt A null first argument means the struct dfa has already been 89*09d4459fSDaniel Fojt initialized by dfaparse; the second argument is ignored. */ 90*09d4459fSDaniel Fojt extern void dfacomp (char const *, ptrdiff_t, struct dfa *, bool); 91*09d4459fSDaniel Fojt 92*09d4459fSDaniel Fojt /* Search through a buffer looking for a match to the given struct dfa. 93*09d4459fSDaniel Fojt Find the first occurrence of a string matching the regexp in the 94*09d4459fSDaniel Fojt buffer, and the shortest possible version thereof. Return a pointer to 95*09d4459fSDaniel Fojt the first character after the match, or NULL if none is found. BEGIN 96*09d4459fSDaniel Fojt points to the beginning of the buffer, and END points to the first byte 97*09d4459fSDaniel Fojt after its end. Note however that we store a sentinel byte (usually 98*09d4459fSDaniel Fojt newline) in *END, so the actual buffer must be one byte longer. 99*09d4459fSDaniel Fojt When ALLOW_NL is true, newlines may appear in the matching string. 100*09d4459fSDaniel Fojt If COUNT is non-NULL, increment *COUNT once for each newline processed. 101*09d4459fSDaniel Fojt Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we 102*09d4459fSDaniel Fojt encountered a back-reference. The caller can use this to decide 103*09d4459fSDaniel Fojt whether to fall back on a backtracking matcher. */ 104*09d4459fSDaniel Fojt extern char *dfaexec (struct dfa *d, char const *begin, char *end, 105*09d4459fSDaniel Fojt bool allow_nl, ptrdiff_t *count, bool *backref); 106*09d4459fSDaniel Fojt 107*09d4459fSDaniel Fojt /* Return a superset for D. The superset matches everything that D 108*09d4459fSDaniel Fojt matches, along with some other strings (though the latter should be 109*09d4459fSDaniel Fojt rare, for efficiency reasons). Return a null pointer if no useful 110*09d4459fSDaniel Fojt superset is available. */ 111*09d4459fSDaniel Fojt extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE; 112*09d4459fSDaniel Fojt 113*09d4459fSDaniel Fojt /* The DFA is likely to be fast. */ 114*09d4459fSDaniel Fojt extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE; 115*09d4459fSDaniel Fojt 116*09d4459fSDaniel Fojt /* Free the storage held by the components of a struct dfa. */ 117*09d4459fSDaniel Fojt extern void dfafree (struct dfa *); 118*09d4459fSDaniel Fojt 119*09d4459fSDaniel Fojt /* Error handling. */ 120*09d4459fSDaniel Fojt 121*09d4459fSDaniel Fojt /* dfawarn() is called by the regexp routines whenever a regex is compiled 122*09d4459fSDaniel Fojt that likely doesn't do what the user wanted. It takes a single 123*09d4459fSDaniel Fojt argument, a NUL-terminated string describing the situation. The user 124*09d4459fSDaniel Fojt must supply a dfawarn. */ 125*09d4459fSDaniel Fojt extern void dfawarn (const char *); 126*09d4459fSDaniel Fojt 127*09d4459fSDaniel Fojt /* dfaerror() is called by the regexp routines whenever an error occurs. It 128*09d4459fSDaniel Fojt takes a single argument, a NUL-terminated string describing the error. 129*09d4459fSDaniel Fojt The user must supply a dfaerror. */ 130*09d4459fSDaniel Fojt extern _Noreturn void dfaerror (const char *); 131