1 /*
2 tre.h - TRE public API definitions
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifndef TRE_H
10 #define TRE_H 1
11
12 #include "tre-config.h"
13
14 #ifdef HAVE_SYS_TYPES_H
15 #include <sys/types.h>
16 #endif /* HAVE_SYS_TYPES_H */
17
18 #ifdef HAVE_LIBUTF8_H
19 #include <libutf8.h>
20 #endif /* HAVE_LIBUTF8_H */
21
22 #ifdef TRE_USE_SYSTEM_REGEX_H
23 /* Include the system regex.h to make TRE ABI compatible with the
24 system regex. */
25 #include TRE_SYSTEM_REGEX_H_PATH
26 #ifdef __weak_alias
__weak_alias(regcomp,tre_regcomp)27 __weak_alias(regcomp, tre_regcomp)
28 __weak_alias(regexec, tre_regexec)
29 __weak_alias(regerror, tre_regerror)
30 __weak_alias(regfree, tre_regfree)
31 #else
32 #define tre_regcomp regcomp
33 #define tre_regexec regexec
34 #define tre_regerror regerror
35 #define tre_regfree regfree
36 #endif
37 #endif /* TRE_USE_SYSTEM_REGEX_H */
38
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42
43 #ifdef TRE_USE_SYSTEM_REGEX_H
44
45 #ifndef REG_OK
46 #define REG_OK 0
47 #endif /* !REG_OK */
48
49 #ifndef HAVE_REG_ERRCODE_T
50 typedef int reg_errcode_t;
51 #endif /* !HAVE_REG_ERRCODE_T */
52
53 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
54 #define REG_LITERAL 0x1000
55 #endif
56
57 /* Extra tre_regcomp() flags. */
58 #ifndef REG_BASIC
59 #define REG_BASIC 0
60 #endif /* !REG_BASIC */
61 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
62 #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
63
64 /* Extra tre_regexec() flags. */
65 #define REG_APPROX_MATCHER 0x1000
66 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
67
68 #else /* !TRE_USE_SYSTEM_REGEX_H */
69
70 /* If the we're not using system regex.h, we need to define the
71 structs and enums ourselves. */
72
73 typedef int regoff_t;
74 typedef struct {
75 size_t re_nsub; /* Number of parenthesized subexpressions. */
76 void *value; /* For internal use only. */
77 } regex_t;
78
79 typedef struct {
80 regoff_t rm_so;
81 regoff_t rm_eo;
82 } regmatch_t;
83
84
85 typedef enum {
86 REG_OK = 0, /* No error. */
87 /* POSIX tre_regcomp() return error codes. (In the order listed in the
88 standard.) */
89 REG_NOMATCH, /* No match. */
90 REG_BADPAT, /* Invalid regexp. */
91 REG_ECOLLATE, /* Unknown collating element. */
92 REG_ECTYPE, /* Unknown character class name. */
93 REG_EESCAPE, /* Trailing backslash. */
94 REG_ESUBREG, /* Invalid back reference. */
95 REG_EBRACK, /* "[]" imbalance */
96 REG_EPAREN, /* "\(\)" or "()" imbalance */
97 REG_EBRACE, /* "\{\}" or "{}" imbalance */
98 REG_BADBR, /* Invalid content of {} */
99 REG_ERANGE, /* Invalid use of range operator */
100 REG_ESPACE, /* Out of memory. */
101 REG_BADRPT, /* Invalid use of repetition operators. */
102 REG_INVARG, /* Invalid arguments. */
103 } reg_errcode_t;
104
105 /* POSIX tre_regcomp() flags. */
106 #define REG_EXTENDED 1
107 #define REG_ICASE (REG_EXTENDED << 1)
108 #define REG_NEWLINE (REG_ICASE << 1)
109 #define REG_NOSUB (REG_NEWLINE << 1)
110
111 /* Extra tre_regcomp() flags. */
112 #define REG_BASIC 0
113 #define REG_LITERAL (REG_NOSUB << 1)
114 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
115 #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
116
117 #define REG_USEBYTES (REG_UNGREEDY << 1)
118
119 /* POSIX tre_regexec() flags. */
120 #define REG_NOTBOL 1
121 #define REG_NOTEOL (REG_NOTBOL << 1)
122
123 /* Extra tre_regexec() flags. */
124 #define REG_APPROX_MATCHER (REG_NOTEOL << 1)
125 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
126 #define REG_STARTEND (REG_BACKTRACKING_MATCHER << 1)
127
128 #endif /* !TRE_USE_SYSTEM_REGEX_H */
129
130 /* REG_NOSPEC and REG_LITERAL mean the same thing. */
131 #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
132 #define REG_NOSPEC REG_LITERAL
133 #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
134 #define REG_LITERAL REG_NOSPEC
135 #endif /* defined(REG_NOSPEC) */
136
137 /* The maximum number of iterations in a bound expression. */
138 #undef RE_DUP_MAX
139 #define RE_DUP_MAX 255
140
141 /* The POSIX.2 regexp functions */
142 extern int
143 tre_regcomp(regex_t *preg, const char *regex, int cflags);
144
145 extern int
146 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
147 regmatch_t pmatch[], int eflags);
148
149 extern int
150 tre_regcompb(regex_t *preg, const char *regex, int cflags);
151
152 extern int
153 tre_regexecb(const regex_t *preg, const char *string, size_t nmatch,
154 regmatch_t pmatch[], int eflags);
155
156 extern size_t
157 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
158 size_t errbuf_size);
159
160 extern void
161 tre_regfree(regex_t *preg);
162
163 #ifdef TRE_WCHAR
164 #ifdef HAVE_WCHAR_H
165 #include <wchar.h>
166 #endif /* HAVE_WCHAR_H */
167
168 /* Wide character versions (not in POSIX.2). */
169 extern int
170 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
171
172 extern int
173 tre_regwexec(const regex_t *preg, const wchar_t *string,
174 size_t nmatch, regmatch_t pmatch[], int eflags);
175 #endif /* TRE_WCHAR */
176
177 /* Versions with a maximum length argument and therefore the capability to
178 handle null characters in the middle of the strings (not in POSIX.2). */
179 extern int
180 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
181
182 extern int
183 tre_regnexec(const regex_t *preg, const char *string, size_t len,
184 size_t nmatch, regmatch_t pmatch[], int eflags);
185
186 /* regn*b versions take byte literally as 8-bit values */
187 extern int
188 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags);
189
190 extern int
191 tre_regnexecb(const regex_t *preg, const char *str, size_t len,
192 size_t nmatch, regmatch_t pmatch[], int eflags);
193
194 #ifdef TRE_WCHAR
195 extern int
196 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
197
198 extern int
199 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
200 size_t nmatch, regmatch_t pmatch[], int eflags);
201 #endif /* TRE_WCHAR */
202
203 #ifdef TRE_APPROX
204
205 /* Approximate matching parameter struct. */
206 typedef struct {
207 int cost_ins; /* Default cost of an inserted character. */
208 int cost_del; /* Default cost of a deleted character. */
209 int cost_subst; /* Default cost of a substituted character. */
210 int max_cost; /* Maximum allowed cost of a match. */
211
212 int max_ins; /* Maximum allowed number of inserts. */
213 int max_del; /* Maximum allowed number of deletes. */
214 int max_subst; /* Maximum allowed number of substitutes. */
215 int max_err; /* Maximum allowed number of errors total. */
216 } regaparams_t;
217
218 /* Approximate matching result struct. */
219 typedef struct {
220 size_t nmatch; /* Length of pmatch[] array. */
221 regmatch_t *pmatch; /* Submatch data. */
222 int cost; /* Cost of the match. */
223 int num_ins; /* Number of inserts in the match. */
224 int num_del; /* Number of deletes in the match. */
225 int num_subst; /* Number of substitutes in the match. */
226 } regamatch_t;
227
228
229 /* Approximate matching functions. */
230 extern int
231 tre_regaexec(const regex_t *preg, const char *string,
232 regamatch_t *match, regaparams_t params, int eflags);
233
234 extern int
235 tre_reganexec(const regex_t *preg, const char *string, size_t len,
236 regamatch_t *match, regaparams_t params, int eflags);
237
238 extern int
239 tre_regaexecb(const regex_t *preg, const char *string,
240 regamatch_t *match, regaparams_t params, int eflags);
241
242 #ifdef TRE_WCHAR
243 /* Wide character approximate matching. */
244 extern int
245 tre_regawexec(const regex_t *preg, const wchar_t *string,
246 regamatch_t *match, regaparams_t params, int eflags);
247
248 extern int
249 tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
250 regamatch_t *match, regaparams_t params, int eflags);
251 #endif /* TRE_WCHAR */
252
253 /* Sets the parameters to default values. */
254 extern void
255 tre_regaparams_default(regaparams_t *params);
256 #endif /* TRE_APPROX */
257
258 #ifdef TRE_WCHAR
259 typedef wchar_t tre_char_t;
260 #else /* !TRE_WCHAR */
261 typedef unsigned char tre_char_t;
262 #endif /* !TRE_WCHAR */
263
264 typedef struct {
265 int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
266 void (*rewind)(size_t pos, void *context);
267 int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
268 void *context;
269 } tre_str_source;
270
271 extern int
272 tre_reguexec(const regex_t *preg, const tre_str_source *string,
273 size_t nmatch, regmatch_t pmatch[], int eflags);
274
275 /* Returns the version string. The returned string is static. */
276 extern char *
277 tre_version(void);
278
279 /* Returns the value for a config parameter. The type to which `result'
280 must point to depends of the value of `query', see documentation for
281 more details. */
282 extern int
283 tre_config(int query, void *result);
284
285 enum {
286 TRE_CONFIG_APPROX,
287 TRE_CONFIG_WCHAR,
288 TRE_CONFIG_MULTIBYTE,
289 TRE_CONFIG_SYSTEM_ABI,
290 TRE_CONFIG_VERSION
291 };
292
293 /* Returns 1 if the compiled pattern has back references, 0 if not. */
294 extern int
295 tre_have_backrefs(const regex_t *preg);
296
297 /* Returns 1 if the compiled pattern uses approximate matching features,
298 0 if not. */
299 extern int
300 tre_have_approx(const regex_t *preg);
301
302 #ifdef __cplusplus
303 }
304 #endif
305 #endif /* TRE_H */
306
307 /* EOF */
308