xref: /dflybsd-src/lib/libc/tre-regex/regex.h (revision 4bda1dff0f39441d231fadbb539cdc220e3f9d06)
1 /*
2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #ifndef _REGEX_H_
31 #define	_REGEX_H_
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 #include <wchar.h>
36 
37 #define tre_regcomp   regcomp
38 #define tre_regexec   regexec
39 #define tre_regerror  regerror
40 #define tre_regfree   regfree
41 
42 #define tre_regwcomp  regwcomp
43 #define tre_regwexec  regwexec
44 #define tre_regncomp  regncomp
45 #define tre_regnexec  regnexec
46 #define tre_regnwcomp regnwcomp
47 #define tre_regnwexec regnwexec
48 
49 typedef enum {
50   REG_OK = 0,		/* No error. */
51   REG_NOMATCH,		/* No match. */
52   REG_BADPAT,		/* Invalid regexp. */
53   REG_ECOLLATE,		/* Unknown collating element. */
54   REG_ECTYPE,		/* Unknown character class name. */
55   REG_EESCAPE,		/* Trailing backslash. */
56   REG_ESUBREG,		/* Invalid back reference. */
57   REG_EBRACK,		/* "[]" imbalance */
58   REG_EPAREN,		/* "\(\)" or "()" imbalance */
59   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
60   REG_BADBR,		/* Invalid content of {} */
61   REG_ERANGE,		/* Invalid use of range operator */
62   REG_ESPACE,		/* Out of memory.  */
63   REG_BADRPT,           /* Invalid use of repetition operators. */
64   REG_EMPTY,            /* rexexp was zero-length string */
65   REG_INVARG,           /* invalid argument to regex routine */
66   REG_ILLSEQ            /* illegal byte sequence */
67 } reg_errcode_t;
68 
69 enum {
70   TRE_CONFIG_APPROX,
71   TRE_CONFIG_WCHAR,
72   TRE_CONFIG_MULTIBYTE,
73   TRE_CONFIG_SYSTEM_ABI,
74   TRE_CONFIG_VERSION
75 };
76 
77 typedef int regoff_t;
78 typedef wchar_t tre_char_t;
79 
80 typedef struct {
81   int re_magic;
82   size_t re_nsub;  /* Number of parenthesized subexpressions. */
83   const void *re_endp; /* regex string end pointer (REG_PEND) */
84   void *value;	   /* For internal use only. */
85 } regex_t;
86 
87 typedef struct {
88   regoff_t rm_so;
89   regoff_t rm_eo;
90 } regmatch_t;
91 
92 /* Approximate matching parameter struct. */
93 typedef struct {
94   int cost_ins;		/* Default cost of an inserted character. */
95   int cost_del;		/* Default cost of a deleted character. */
96   int cost_subst;	/* Default cost of a substituted character. */
97   int max_cost;		/* Maximum allowed cost of a match. */
98 
99   int max_ins;		/* Maximum allowed number of inserts. */
100   int max_del;		/* Maximum allowed number of deletes. */
101   int max_subst;	/* Maximum allowed number of substitutes. */
102   int max_err;		/* Maximum allowed number of errors total. */
103 } regaparams_t;
104 
105 /* Approximate matching result struct. */
106 typedef struct {
107   size_t nmatch;	/* Length of pmatch[] array. */
108   regmatch_t *pmatch;	/* Submatch data. */
109   int cost;		/* Cost of the match. */
110   int num_ins;		/* Number of inserts in the match. */
111   int num_del;		/* Number of deletes in the match. */
112   int num_subst;	/* Number of substitutes in the match. */
113 } regamatch_t;
114 
115 typedef struct {
116   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
117   void (*rewind)(size_t pos, void *context);
118   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
119   void *context;
120 } tre_str_source;
121 
122 /* POSIX tre_regcomp() flags. */
123 #define REG_EXTENDED	1
124 #define REG_ICASE	(REG_EXTENDED << 1)
125 #define REG_NEWLINE	(REG_ICASE << 1)
126 #define REG_NOSUB	(REG_NEWLINE << 1)
127 
128 /* Extra tre_regcomp() flags. */
129 #define REG_BASIC	0
130 #define REG_LITERAL	(REG_NOSUB << 1)
131 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
132 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
133 #define REG_PEND	(REG_UNGREEDY << 1)
134 #define REG_ENHANCED	(REG_PEND << 1)
135 
136 /* alias regcomp flags. */
137 #define REG_NOSPEC	REG_LITERAL
138 #define REG_MINIMAL	REG_UNGREEDY
139 
140 /* POSIX tre_regexec() flags. */
141 #define REG_NOTBOL	1
142 #define REG_NOTEOL	(REG_NOTBOL << 1)
143 #define REG_STARTEND	(REG_NOTEOL << 1)
144 #define	REG_BACKR	(REG_STARTEND << 1)
145 
146 /* Extra tre_regexec() flags. */
147 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
148 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
149 
150 /* The maximum number of iterations in a bound expression. */
151 #define RE_DUP_MAX 255
152 
153 #define _REG_nexec 1
154 
155 __BEGIN_DECLS
156 
157 /* The POSIX.2 regexp functions */
158 int
159 tre_regcomp(regex_t *preg, const char *regex, int cflags);
160 
161 int
162 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
163 	regmatch_t pmatch[], int eflags);
164 
165 size_t
166 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
167 	 size_t errbuf_size);
168 
169 void
170 tre_regfree(regex_t *preg);
171 
172 /* Wide character versions (not in POSIX.2). */
173 int
174 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
175 
176 int
177 tre_regwexec(const regex_t *preg, const wchar_t *string,
178 	 size_t nmatch, regmatch_t pmatch[], int eflags);
179 
180 /* Versions with a maximum length argument and therefore the capability to
181    handle null characters in the middle of the strings (not in POSIX.2). */
182 int
183 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
184 
185 int
186 tre_regnexec(const regex_t *preg, const char *string, size_t len,
187 	 size_t nmatch, regmatch_t pmatch[], int eflags);
188 
189 int
190 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
191 
192 int
193 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
194 	  size_t nmatch, regmatch_t pmatch[], int eflags);
195 
196 /* Returns the version string.	The returned string is static. */
197 char *
198 tre_version(void);
199 
200 /* Returns the value for a config parameter.  The type to which `result'
201    must point to depends of the value of `query', see documentation for
202    more details. */
203 int
204 tre_config(int query, void *result);
205 
206 /* Returns 1 if the compiled pattern has back references, 0 if not. */
207 int
208 tre_have_backrefs(const regex_t *preg);
209 
210 /* Returns 1 if the compiled pattern uses approximate matching features,
211    0 if not. */
212 int
213 tre_have_approx(const regex_t *preg);
214 __END_DECLS
215 
216 /* The POSIX.2 regexp functions, locale version */
217 int
218 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
219 
220 int
221 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
222     locale_t locale);
223 
224 int
225 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
226     locale_t locale);
227 
228 int
229 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
230     locale_t locale);
231 
232 #endif /* !_REGEX_H_ */
233