xref: /netbsd-src/external/bsd/tre/dist/lib/tre.h (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*
2   tre.h - TRE public API definitions
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifndef TRE_H
10 #define TRE_H 1
11 
12 #include "tre-config.h"
13 
14 #ifdef HAVE_SYS_TYPES_H
15 #include <sys/types.h>
16 #endif /* HAVE_SYS_TYPES_H */
17 
18 #ifdef HAVE_LIBUTF8_H
19 #include <libutf8.h>
20 #endif /* HAVE_LIBUTF8_H */
21 
22 #ifdef TRE_USE_SYSTEM_REGEX_H
23 /* Include the system regex.h to make TRE ABI compatible with the
24    system regex. */
25 #include TRE_SYSTEM_REGEX_H_PATH
26 #ifdef __weak_alias
27 __weak_alias(regcomp, tre_regcomp)
28 __weak_alias(regexec, tre_regexec)
29 __weak_alias(regerror, tre_regerror)
30 __weak_alias(regfree, tre_regfree)
31 #else
32 #define tre_regcomp  regcomp
33 #define tre_regexec  regexec
34 #define tre_regerror regerror
35 #define tre_regfree  regfree
36 #endif
37 #endif /* TRE_USE_SYSTEM_REGEX_H */
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 #ifdef TRE_USE_SYSTEM_REGEX_H
44 
45 #ifndef REG_OK
46 #define REG_OK 0
47 #endif /* !REG_OK */
48 
49 #ifndef HAVE_REG_ERRCODE_T
50 typedef int reg_errcode_t;
51 #endif /* !HAVE_REG_ERRCODE_T */
52 
53 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
54 #define REG_LITERAL 0x1000
55 #endif
56 
57 /* Extra tre_regcomp() flags. */
58 #ifndef REG_BASIC
59 #define REG_BASIC	0
60 #endif /* !REG_BASIC */
61 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
62 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
63 
64 /* Extra tre_regexec() flags. */
65 #define REG_APPROX_MATCHER	 0x1000
66 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
67 
68 #else /* !TRE_USE_SYSTEM_REGEX_H */
69 
70 /* If the we're not using system regex.h, we need to define the
71    structs and enums ourselves. */
72 
73 typedef int regoff_t;
74 typedef struct {
75   size_t re_nsub;  /* Number of parenthesized subexpressions. */
76   void *value;	   /* For internal use only. */
77 } regex_t;
78 
79 typedef struct {
80   regoff_t rm_so;
81   regoff_t rm_eo;
82 } regmatch_t;
83 
84 
85 typedef enum {
86   REG_OK = 0,		/* No error. */
87   /* POSIX tre_regcomp() return error codes.  (In the order listed in the
88      standard.)	 */
89   REG_NOMATCH,		/* No match. */
90   REG_BADPAT,		/* Invalid regexp. */
91   REG_ECOLLATE,		/* Unknown collating element. */
92   REG_ECTYPE,		/* Unknown character class name. */
93   REG_EESCAPE,		/* Trailing backslash. */
94   REG_ESUBREG,		/* Invalid back reference. */
95   REG_EBRACK,		/* "[]" imbalance */
96   REG_EPAREN,		/* "\(\)" or "()" imbalance */
97   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
98   REG_BADBR,		/* Invalid content of {} */
99   REG_ERANGE,		/* Invalid use of range operator */
100   REG_ESPACE,		/* Out of memory.  */
101   REG_BADRPT            /* Invalid use of repetition operators. */
102 } reg_errcode_t;
103 
104 /* POSIX tre_regcomp() flags. */
105 #define REG_EXTENDED	1
106 #define REG_ICASE	(REG_EXTENDED << 1)
107 #define REG_NEWLINE	(REG_ICASE << 1)
108 #define REG_NOSUB	(REG_NEWLINE << 1)
109 
110 /* Extra tre_regcomp() flags. */
111 #define REG_BASIC	0
112 #define REG_LITERAL	(REG_NOSUB << 1)
113 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
114 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
115 
116 /* POSIX tre_regexec() flags. */
117 #define REG_NOTBOL 1
118 #define REG_NOTEOL (REG_NOTBOL << 1)
119 
120 /* Extra tre_regexec() flags. */
121 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
122 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
123 
124 #endif /* !TRE_USE_SYSTEM_REGEX_H */
125 
126 /* REG_NOSPEC and REG_LITERAL mean the same thing. */
127 #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
128 #define REG_NOSPEC	REG_LITERAL
129 #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
130 #define REG_LITERAL	REG_NOSPEC
131 #endif /* defined(REG_NOSPEC) */
132 
133 /* The maximum number of iterations in a bound expression. */
134 #undef RE_DUP_MAX
135 #define RE_DUP_MAX 255
136 
137 /* The POSIX.2 regexp functions */
138 extern int
139 tre_regcomp(regex_t *preg, const char *regex, int cflags);
140 
141 extern int
142 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
143 	regmatch_t pmatch[], int eflags);
144 
145 extern size_t
146 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
147 	 size_t errbuf_size);
148 
149 extern void
150 tre_regfree(regex_t *preg);
151 
152 #ifdef TRE_WCHAR
153 #ifdef HAVE_WCHAR_H
154 #include <wchar.h>
155 #endif /* HAVE_WCHAR_H */
156 
157 /* Wide character versions (not in POSIX.2). */
158 extern int
159 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
160 
161 extern int
162 tre_regwexec(const regex_t *preg, const wchar_t *string,
163 	 size_t nmatch, regmatch_t pmatch[], int eflags);
164 #endif /* TRE_WCHAR */
165 
166 /* Versions with a maximum length argument and therefore the capability to
167    handle null characters in the middle of the strings (not in POSIX.2). */
168 extern int
169 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
170 
171 extern int
172 tre_regnexec(const regex_t *preg, const char *string, size_t len,
173 	 size_t nmatch, regmatch_t pmatch[], int eflags);
174 
175 #ifdef TRE_WCHAR
176 extern int
177 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
178 
179 extern int
180 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
181 	  size_t nmatch, regmatch_t pmatch[], int eflags);
182 #endif /* TRE_WCHAR */
183 
184 #ifdef TRE_APPROX
185 
186 /* Approximate matching parameter struct. */
187 typedef struct {
188   int cost_ins;	       /* Default cost of an inserted character. */
189   int cost_del;	       /* Default cost of a deleted character. */
190   int cost_subst;      /* Default cost of a substituted character. */
191   int max_cost;	       /* Maximum allowed cost of a match. */
192 
193   int max_ins;	       /* Maximum allowed number of inserts. */
194   int max_del;	       /* Maximum allowed number of deletes. */
195   int max_subst;       /* Maximum allowed number of substitutes. */
196   int max_err;	       /* Maximum allowed number of errors total. */
197 } regaparams_t;
198 
199 /* Approximate matching result struct. */
200 typedef struct {
201   size_t nmatch;       /* Length of pmatch[] array. */
202   regmatch_t *pmatch;  /* Submatch data. */
203   int cost;	       /* Cost of the match. */
204   int num_ins;	       /* Number of inserts in the match. */
205   int num_del;	       /* Number of deletes in the match. */
206   int num_subst;       /* Number of substitutes in the match. */
207 } regamatch_t;
208 
209 
210 /* Approximate matching functions. */
211 extern int
212 tre_regaexec(const regex_t *preg, const char *string,
213 	 regamatch_t *match, regaparams_t params, int eflags);
214 
215 extern int
216 tre_reganexec(const regex_t *preg, const char *string, size_t len,
217 	  regamatch_t *match, regaparams_t params, int eflags);
218 #ifdef TRE_WCHAR
219 /* Wide character approximate matching. */
220 extern int
221 tre_regawexec(const regex_t *preg, const wchar_t *string,
222 	  regamatch_t *match, regaparams_t params, int eflags);
223 
224 extern int
225 tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
226 	   regamatch_t *match, regaparams_t params, int eflags);
227 #endif /* TRE_WCHAR */
228 
229 /* Sets the parameters to default values. */
230 extern void
231 tre_regaparams_default(regaparams_t *params);
232 #endif /* TRE_APPROX */
233 
234 #ifdef TRE_WCHAR
235 typedef wchar_t tre_char_t;
236 #else /* !TRE_WCHAR */
237 typedef unsigned char tre_char_t;
238 #endif /* !TRE_WCHAR */
239 
240 typedef struct {
241   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
242   void (*rewind)(size_t pos, void *context);
243   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
244   void *context;
245 } tre_str_source;
246 
247 extern int
248 tre_reguexec(const regex_t *preg, const tre_str_source *string,
249 	 size_t nmatch, regmatch_t pmatch[], int eflags);
250 
251 /* Returns the version string.	The returned string is static. */
252 extern char *
253 tre_version(void);
254 
255 /* Returns the value for a config parameter.  The type to which `result'
256    must point to depends of the value of `query', see documentation for
257    more details. */
258 extern int
259 tre_config(int query, void *result);
260 
261 enum {
262   TRE_CONFIG_APPROX,
263   TRE_CONFIG_WCHAR,
264   TRE_CONFIG_MULTIBYTE,
265   TRE_CONFIG_SYSTEM_ABI,
266   TRE_CONFIG_VERSION
267 };
268 
269 /* Returns 1 if the compiled pattern has back references, 0 if not. */
270 extern int
271 tre_have_backrefs(const regex_t *preg);
272 
273 /* Returns 1 if the compiled pattern uses approximate matching features,
274    0 if not. */
275 extern int
276 tre_have_approx(const regex_t *preg);
277 
278 #ifdef __cplusplus
279 }
280 #endif
281 #endif				/* TRE_H */
282 
283 /* EOF */
284