xref: /netbsd-src/external/bsd/tre/dist/lib/tre.h (revision b2085f3f9f585877f381ec15d12bc8bd30e63f22)
1 /*
2   tre.h - TRE public API definitions
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifndef TRE_H
10 #define TRE_H 1
11 
12 #include "tre-config.h"
13 
14 #ifdef HAVE_SYS_TYPES_H
15 #include <sys/types.h>
16 #endif /* HAVE_SYS_TYPES_H */
17 
18 #ifdef HAVE_LIBUTF8_H
19 #include <libutf8.h>
20 #endif /* HAVE_LIBUTF8_H */
21 
22 #ifdef TRE_USE_SYSTEM_REGEX_H
23 /* Include the system regex.h to make TRE ABI compatible with the
24    system regex. */
25 #include TRE_SYSTEM_REGEX_H_PATH
26 #ifdef __weak_alias
__weak_alias(regcomp,tre_regcomp)27 __weak_alias(regcomp, tre_regcomp)
28 __weak_alias(regexec, tre_regexec)
29 __weak_alias(regerror, tre_regerror)
30 __weak_alias(regfree, tre_regfree)
31 #else
32 #define tre_regcomp  regcomp
33 #define tre_regexec  regexec
34 #define tre_regerror regerror
35 #define tre_regfree  regfree
36 #endif
37 #endif /* TRE_USE_SYSTEM_REGEX_H */
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 #ifdef TRE_USE_SYSTEM_REGEX_H
44 
45 #ifndef REG_OK
46 #define REG_OK 0
47 #endif /* !REG_OK */
48 
49 #ifndef HAVE_REG_ERRCODE_T
50 typedef int reg_errcode_t;
51 #endif /* !HAVE_REG_ERRCODE_T */
52 
53 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
54 #define REG_LITERAL 0x1000
55 #endif
56 
57 /* Extra tre_regcomp() flags. */
58 #ifndef REG_BASIC
59 #define REG_BASIC	0
60 #endif /* !REG_BASIC */
61 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
62 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
63 
64 /* Extra tre_regexec() flags. */
65 #define REG_APPROX_MATCHER	 0x1000
66 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
67 
68 #else /* !TRE_USE_SYSTEM_REGEX_H */
69 
70 /* If the we're not using system regex.h, we need to define the
71    structs and enums ourselves. */
72 
73 typedef int regoff_t;
74 typedef struct {
75   size_t re_nsub;  /* Number of parenthesized subexpressions. */
76   void *value;	   /* For internal use only. */
77 } regex_t;
78 
79 typedef struct {
80   regoff_t rm_so;
81   regoff_t rm_eo;
82 } regmatch_t;
83 
84 
85 typedef enum {
86   REG_OK = 0,		/* No error. */
87   /* POSIX tre_regcomp() return error codes.  (In the order listed in the
88      standard.)	 */
89   REG_NOMATCH,		/* No match. */
90   REG_BADPAT,		/* Invalid regexp. */
91   REG_ECOLLATE,		/* Unknown collating element. */
92   REG_ECTYPE,		/* Unknown character class name. */
93   REG_EESCAPE,		/* Trailing backslash. */
94   REG_ESUBREG,		/* Invalid back reference. */
95   REG_EBRACK,		/* "[]" imbalance */
96   REG_EPAREN,		/* "\(\)" or "()" imbalance */
97   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
98   REG_BADBR,		/* Invalid content of {} */
99   REG_ERANGE,		/* Invalid use of range operator */
100   REG_ESPACE,		/* Out of memory.  */
101   REG_BADRPT,           /* Invalid use of repetition operators. */
102   REG_INVARG,           /* Invalid arguments. */
103 } reg_errcode_t;
104 
105 /* POSIX tre_regcomp() flags. */
106 #define REG_EXTENDED	1
107 #define REG_ICASE	(REG_EXTENDED << 1)
108 #define REG_NEWLINE	(REG_ICASE << 1)
109 #define REG_NOSUB	(REG_NEWLINE << 1)
110 
111 /* Extra tre_regcomp() flags. */
112 #define REG_BASIC	0
113 #define REG_LITERAL	(REG_NOSUB << 1)
114 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
115 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
116 
117 #define REG_USEBYTES    (REG_UNGREEDY << 1)
118 
119 /* POSIX tre_regexec() flags. */
120 #define REG_NOTBOL 1
121 #define REG_NOTEOL (REG_NOTBOL << 1)
122 
123 /* Extra tre_regexec() flags. */
124 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
125 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
126 #define REG_STARTEND		 (REG_BACKTRACKING_MATCHER << 1)
127 
128 #endif /* !TRE_USE_SYSTEM_REGEX_H */
129 
130 /* REG_NOSPEC and REG_LITERAL mean the same thing. */
131 #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
132 #define REG_NOSPEC	REG_LITERAL
133 #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
134 #define REG_LITERAL	REG_NOSPEC
135 #endif /* defined(REG_NOSPEC) */
136 
137 /* The maximum number of iterations in a bound expression. */
138 #undef RE_DUP_MAX
139 #define RE_DUP_MAX 255
140 
141 /* The POSIX.2 regexp functions */
142 extern int
143 tre_regcomp(regex_t *preg, const char *regex, int cflags);
144 
145 extern int
146 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
147 	regmatch_t pmatch[], int eflags);
148 
149 extern int
150 tre_regcompb(regex_t *preg, const char *regex, int cflags);
151 
152 extern int
153 tre_regexecb(const regex_t *preg, const char *string, size_t nmatch,
154 	regmatch_t pmatch[], int eflags);
155 
156 extern size_t
157 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
158 	 size_t errbuf_size);
159 
160 extern void
161 tre_regfree(regex_t *preg);
162 
163 #ifdef TRE_WCHAR
164 #ifdef HAVE_WCHAR_H
165 #include <wchar.h>
166 #endif /* HAVE_WCHAR_H */
167 
168 /* Wide character versions (not in POSIX.2). */
169 extern int
170 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
171 
172 extern int
173 tre_regwexec(const regex_t *preg, const wchar_t *string,
174 	 size_t nmatch, regmatch_t pmatch[], int eflags);
175 #endif /* TRE_WCHAR */
176 
177 /* Versions with a maximum length argument and therefore the capability to
178    handle null characters in the middle of the strings (not in POSIX.2). */
179 extern int
180 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
181 
182 extern int
183 tre_regnexec(const regex_t *preg, const char *string, size_t len,
184 	 size_t nmatch, regmatch_t pmatch[], int eflags);
185 
186 /* regn*b versions take byte literally as 8-bit values */
187 extern int
188 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags);
189 
190 extern int
191 tre_regnexecb(const regex_t *preg, const char *str, size_t len,
192 	  size_t nmatch, regmatch_t pmatch[], int eflags);
193 
194 #ifdef TRE_WCHAR
195 extern int
196 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
197 
198 extern int
199 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
200 	  size_t nmatch, regmatch_t pmatch[], int eflags);
201 #endif /* TRE_WCHAR */
202 
203 #ifdef TRE_APPROX
204 
205 /* Approximate matching parameter struct. */
206 typedef struct {
207   int cost_ins;	       /* Default cost of an inserted character. */
208   int cost_del;	       /* Default cost of a deleted character. */
209   int cost_subst;      /* Default cost of a substituted character. */
210   int max_cost;	       /* Maximum allowed cost of a match. */
211 
212   int max_ins;	       /* Maximum allowed number of inserts. */
213   int max_del;	       /* Maximum allowed number of deletes. */
214   int max_subst;       /* Maximum allowed number of substitutes. */
215   int max_err;	       /* Maximum allowed number of errors total. */
216 } regaparams_t;
217 
218 /* Approximate matching result struct. */
219 typedef struct {
220   size_t nmatch;       /* Length of pmatch[] array. */
221   regmatch_t *pmatch;  /* Submatch data. */
222   int cost;	       /* Cost of the match. */
223   int num_ins;	       /* Number of inserts in the match. */
224   int num_del;	       /* Number of deletes in the match. */
225   int num_subst;       /* Number of substitutes in the match. */
226 } regamatch_t;
227 
228 
229 /* Approximate matching functions. */
230 extern int
231 tre_regaexec(const regex_t *preg, const char *string,
232 	 regamatch_t *match, regaparams_t params, int eflags);
233 
234 extern int
235 tre_reganexec(const regex_t *preg, const char *string, size_t len,
236 	  regamatch_t *match, regaparams_t params, int eflags);
237 
238 extern int
239 tre_regaexecb(const regex_t *preg, const char *string,
240 	  regamatch_t *match, regaparams_t params, int eflags);
241 
242 #ifdef TRE_WCHAR
243 /* Wide character approximate matching. */
244 extern int
245 tre_regawexec(const regex_t *preg, const wchar_t *string,
246 	  regamatch_t *match, regaparams_t params, int eflags);
247 
248 extern int
249 tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
250 	   regamatch_t *match, regaparams_t params, int eflags);
251 #endif /* TRE_WCHAR */
252 
253 /* Sets the parameters to default values. */
254 extern void
255 tre_regaparams_default(regaparams_t *params);
256 #endif /* TRE_APPROX */
257 
258 #ifdef TRE_WCHAR
259 typedef wchar_t tre_char_t;
260 #else /* !TRE_WCHAR */
261 typedef unsigned char tre_char_t;
262 #endif /* !TRE_WCHAR */
263 
264 typedef struct {
265   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
266   void (*rewind)(size_t pos, void *context);
267   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
268   void *context;
269 } tre_str_source;
270 
271 extern int
272 tre_reguexec(const regex_t *preg, const tre_str_source *string,
273 	 size_t nmatch, regmatch_t pmatch[], int eflags);
274 
275 /* Returns the version string.	The returned string is static. */
276 extern char *
277 tre_version(void);
278 
279 /* Returns the value for a config parameter.  The type to which `result'
280    must point to depends of the value of `query', see documentation for
281    more details. */
282 extern int
283 tre_config(int query, void *result);
284 
285 enum {
286   TRE_CONFIG_APPROX,
287   TRE_CONFIG_WCHAR,
288   TRE_CONFIG_MULTIBYTE,
289   TRE_CONFIG_SYSTEM_ABI,
290   TRE_CONFIG_VERSION
291 };
292 
293 /* Returns 1 if the compiled pattern has back references, 0 if not. */
294 extern int
295 tre_have_backrefs(const regex_t *preg);
296 
297 /* Returns 1 if the compiled pattern uses approximate matching features,
298    0 if not. */
299 extern int
300 tre_have_approx(const regex_t *preg);
301 
302 #ifdef __cplusplus
303 }
304 #endif
305 #endif				/* TRE_H */
306 
307 /* EOF */
308