xref: /dflybsd-src/contrib/grep/lib/dfa.h (revision 91b9ed38d3db6a8a8ac5b66da1d43e6e331e259a)
1*09d4459fSDaniel Fojt /* dfa.h - declarations for GNU deterministic regexp compiler
2*09d4459fSDaniel Fojt    Copyright (C) 1988, 1998, 2007, 2009-2020 Free Software Foundation, Inc.
3*09d4459fSDaniel Fojt 
4*09d4459fSDaniel Fojt    This program is free software; you can redistribute it and/or modify
5*09d4459fSDaniel Fojt    it under the terms of the GNU General Public License as published by
6*09d4459fSDaniel Fojt    the Free Software Foundation; either version 3, or (at your option)
7*09d4459fSDaniel Fojt    any later version.
8*09d4459fSDaniel Fojt 
9*09d4459fSDaniel Fojt    This program is distributed in the hope that it will be useful,
10*09d4459fSDaniel Fojt    but WITHOUT ANY WARRANTY; without even the implied warranty of
11*09d4459fSDaniel Fojt    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*09d4459fSDaniel Fojt    GNU General Public License for more details.
13*09d4459fSDaniel Fojt 
14*09d4459fSDaniel Fojt    You should have received a copy of the GNU General Public License
15*09d4459fSDaniel Fojt    along with this program; if not, write to the Free Software
16*09d4459fSDaniel Fojt    Foundation, Inc.,
17*09d4459fSDaniel Fojt    51 Franklin Street - Fifth Floor, Boston, MA  02110-1301, USA */
18*09d4459fSDaniel Fojt 
19*09d4459fSDaniel Fojt /* Written June, 1988 by Mike Haertel */
20*09d4459fSDaniel Fojt 
21*09d4459fSDaniel Fojt #include <regex.h>
22*09d4459fSDaniel Fojt #include <stdbool.h>
23*09d4459fSDaniel Fojt #include <stddef.h>
24*09d4459fSDaniel Fojt 
25*09d4459fSDaniel Fojt struct localeinfo; /* See localeinfo.h.  */
26*09d4459fSDaniel Fojt 
27*09d4459fSDaniel Fojt /* Element of a list of strings, at least one of which is known to
28*09d4459fSDaniel Fojt    appear in any R.E. matching the DFA. */
29*09d4459fSDaniel Fojt struct dfamust
30*09d4459fSDaniel Fojt {
31*09d4459fSDaniel Fojt   bool exact;
32*09d4459fSDaniel Fojt   bool begline;
33*09d4459fSDaniel Fojt   bool endline;
34*09d4459fSDaniel Fojt   char must[FLEXIBLE_ARRAY_MEMBER];
35*09d4459fSDaniel Fojt };
36*09d4459fSDaniel Fojt 
37*09d4459fSDaniel Fojt /* The dfa structure. It is completely opaque. */
38*09d4459fSDaniel Fojt struct dfa;
39*09d4459fSDaniel Fojt 
40*09d4459fSDaniel Fojt /* Needed when Gnulib is not used.  */
41*09d4459fSDaniel Fojt #ifndef _GL_ATTRIBUTE_MALLOC
42*09d4459fSDaniel Fojt # define  _GL_ATTRIBUTE_MALLOC
43*09d4459fSDaniel Fojt #endif
44*09d4459fSDaniel Fojt 
45*09d4459fSDaniel Fojt /* Entry points. */
46*09d4459fSDaniel Fojt 
47*09d4459fSDaniel Fojt /* Allocate a struct dfa.  The struct dfa is completely opaque.
48*09d4459fSDaniel Fojt    It should be initialized via dfasyntax or dfacopysyntax before other use.
49*09d4459fSDaniel Fojt    The returned pointer should be passed directly to free() after
50*09d4459fSDaniel Fojt    calling dfafree() on it. */
51*09d4459fSDaniel Fojt extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
52*09d4459fSDaniel Fojt 
53*09d4459fSDaniel Fojt /* DFA options that can be ORed together, for dfasyntax's 4th arg.  */
54*09d4459fSDaniel Fojt enum
55*09d4459fSDaniel Fojt   {
56*09d4459fSDaniel Fojt     /* ^ and $ match only the start and end of data, and do not match
57*09d4459fSDaniel Fojt        end-of-line within data.  This is always false for grep, but
58*09d4459fSDaniel Fojt        possibly true for other apps.  */
59*09d4459fSDaniel Fojt     DFA_ANCHOR = 1 << 0,
60*09d4459fSDaniel Fojt 
61*09d4459fSDaniel Fojt     /* '\0' in data is end-of-line, instead of the traditional '\n'.  */
62*09d4459fSDaniel Fojt     DFA_EOL_NUL = 1 << 1
63*09d4459fSDaniel Fojt   };
64*09d4459fSDaniel Fojt 
65*09d4459fSDaniel Fojt /* Initialize or reinitialize a DFA.  The arguments are:
66*09d4459fSDaniel Fojt    1. The DFA to operate on.
67*09d4459fSDaniel Fojt    2. Information about the current locale.
68*09d4459fSDaniel Fojt    3. Syntax bits described in regex.h.
69*09d4459fSDaniel Fojt    4. Additional DFA options described above.  */
70*09d4459fSDaniel Fojt extern void dfasyntax (struct dfa *, struct localeinfo const *,
71*09d4459fSDaniel Fojt                        reg_syntax_t, int);
72*09d4459fSDaniel Fojt 
73*09d4459fSDaniel Fojt /* Initialize or reinitialize a DFA from an already-initialized DFA.  */
74*09d4459fSDaniel Fojt extern void dfacopysyntax (struct dfa *, struct dfa const *);
75*09d4459fSDaniel Fojt 
76*09d4459fSDaniel Fojt /* Parse the given string of given length into the given struct dfa.  */
77*09d4459fSDaniel Fojt extern void dfaparse (char const *, ptrdiff_t, struct dfa *);
78*09d4459fSDaniel Fojt 
79*09d4459fSDaniel Fojt /* Allocate and return a struct dfamust from a struct dfa that was
80*09d4459fSDaniel Fojt    initialized by dfaparse and not yet given to dfacomp.  */
81*09d4459fSDaniel Fojt extern struct dfamust *dfamust (struct dfa const *);
82*09d4459fSDaniel Fojt 
83*09d4459fSDaniel Fojt /* Free the storage held by the components of a struct dfamust. */
84*09d4459fSDaniel Fojt extern void dfamustfree (struct dfamust *);
85*09d4459fSDaniel Fojt 
86*09d4459fSDaniel Fojt /* Compile the given string of the given length into the given struct dfa.
87*09d4459fSDaniel Fojt    The last argument says whether to build a searching or an exact matcher.
88*09d4459fSDaniel Fojt    A null first argument means the struct dfa has already been
89*09d4459fSDaniel Fojt    initialized by dfaparse; the second argument is ignored.  */
90*09d4459fSDaniel Fojt extern void dfacomp (char const *, ptrdiff_t, struct dfa *, bool);
91*09d4459fSDaniel Fojt 
92*09d4459fSDaniel Fojt /* Search through a buffer looking for a match to the given struct dfa.
93*09d4459fSDaniel Fojt    Find the first occurrence of a string matching the regexp in the
94*09d4459fSDaniel Fojt    buffer, and the shortest possible version thereof.  Return a pointer to
95*09d4459fSDaniel Fojt    the first character after the match, or NULL if none is found.  BEGIN
96*09d4459fSDaniel Fojt    points to the beginning of the buffer, and END points to the first byte
97*09d4459fSDaniel Fojt    after its end.  Note however that we store a sentinel byte (usually
98*09d4459fSDaniel Fojt    newline) in *END, so the actual buffer must be one byte longer.
99*09d4459fSDaniel Fojt    When ALLOW_NL is true, newlines may appear in the matching string.
100*09d4459fSDaniel Fojt    If COUNT is non-NULL, increment *COUNT once for each newline processed.
101*09d4459fSDaniel Fojt    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
102*09d4459fSDaniel Fojt    encountered a back-reference.  The caller can use this to decide
103*09d4459fSDaniel Fojt    whether to fall back on a backtracking matcher.  */
104*09d4459fSDaniel Fojt extern char *dfaexec (struct dfa *d, char const *begin, char *end,
105*09d4459fSDaniel Fojt                       bool allow_nl, ptrdiff_t *count, bool *backref);
106*09d4459fSDaniel Fojt 
107*09d4459fSDaniel Fojt /* Return a superset for D.  The superset matches everything that D
108*09d4459fSDaniel Fojt    matches, along with some other strings (though the latter should be
109*09d4459fSDaniel Fojt    rare, for efficiency reasons).  Return a null pointer if no useful
110*09d4459fSDaniel Fojt    superset is available.  */
111*09d4459fSDaniel Fojt extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
112*09d4459fSDaniel Fojt 
113*09d4459fSDaniel Fojt /* The DFA is likely to be fast.  */
114*09d4459fSDaniel Fojt extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
115*09d4459fSDaniel Fojt 
116*09d4459fSDaniel Fojt /* Free the storage held by the components of a struct dfa. */
117*09d4459fSDaniel Fojt extern void dfafree (struct dfa *);
118*09d4459fSDaniel Fojt 
119*09d4459fSDaniel Fojt /* Error handling. */
120*09d4459fSDaniel Fojt 
121*09d4459fSDaniel Fojt /* dfawarn() is called by the regexp routines whenever a regex is compiled
122*09d4459fSDaniel Fojt    that likely doesn't do what the user wanted.  It takes a single
123*09d4459fSDaniel Fojt    argument, a NUL-terminated string describing the situation.  The user
124*09d4459fSDaniel Fojt    must supply a dfawarn.  */
125*09d4459fSDaniel Fojt extern void dfawarn (const char *);
126*09d4459fSDaniel Fojt 
127*09d4459fSDaniel Fojt /* dfaerror() is called by the regexp routines whenever an error occurs.  It
128*09d4459fSDaniel Fojt    takes a single argument, a NUL-terminated string describing the error.
129*09d4459fSDaniel Fojt    The user must supply a dfaerror.  */
130*09d4459fSDaniel Fojt extern _Noreturn void dfaerror (const char *);
131