xref: /dflybsd-src/lib/libc/tre-regex/regex.h (revision e4bdac6bd0bece3ae6b3233ad260e8e82d21ba76)
1 /*
2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #ifndef _REGEX_H_
31 #define	_REGEX_H_
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 #include <wchar.h>
36 #include <xlocale.h>
37 
38 #define tre_regcomp   regcomp
39 #define tre_regexec   regexec
40 #define tre_regerror  regerror
41 #define tre_regfree   regfree
42 
43 #define tre_regwcomp  regwcomp
44 #define tre_regwexec  regwexec
45 #define tre_regncomp  regncomp
46 #define tre_regnexec  regnexec
47 #define tre_regnwcomp regnwcomp
48 #define tre_regnwexec regnwexec
49 
50 typedef enum {
51   REG_OK = 0,		/* No error. */
52   REG_NOMATCH,		/* No match. */
53   REG_BADPAT,		/* Invalid regexp. */
54   REG_ECOLLATE,		/* Unknown collating element. */
55   REG_ECTYPE,		/* Unknown character class name. */
56   REG_EESCAPE,		/* Trailing backslash. */
57   REG_ESUBREG,		/* Invalid back reference. */
58   REG_EBRACK,		/* "[]" imbalance */
59   REG_EPAREN,		/* "\(\)" or "()" imbalance */
60   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
61   REG_BADBR,		/* Invalid content of {} */
62   REG_ERANGE,		/* Invalid use of range operator */
63   REG_ESPACE,		/* Out of memory.  */
64   REG_BADRPT,           /* Invalid use of repetition operators. */
65   REG_EMPTY,            /* rexexp was zero-length string */
66   REG_INVARG,           /* invalid argument to regex routine */
67   REG_ILLSEQ            /* illegal byte sequence */
68 } reg_errcode_t;
69 
70 enum {
71   TRE_CONFIG_APPROX,
72   TRE_CONFIG_WCHAR,
73   TRE_CONFIG_MULTIBYTE,
74   TRE_CONFIG_SYSTEM_ABI,
75   TRE_CONFIG_VERSION
76 };
77 
78 typedef int regoff_t;
79 typedef wchar_t tre_char_t;
80 
81 typedef struct {
82   int re_magic;
83   size_t re_nsub;  /* Number of parenthesized subexpressions. */
84   const void *re_endp; /* regex string end pointer (REG_PEND) */
85   void *value;	   /* For internal use only. */
86 } regex_t;
87 
88 typedef struct {
89   regoff_t rm_so;
90   regoff_t rm_eo;
91 } regmatch_t;
92 
93 /* Approximate matching parameter struct. */
94 typedef struct {
95   int cost_ins;		/* Default cost of an inserted character. */
96   int cost_del;		/* Default cost of a deleted character. */
97   int cost_subst;	/* Default cost of a substituted character. */
98   int max_cost;		/* Maximum allowed cost of a match. */
99 
100   int max_ins;		/* Maximum allowed number of inserts. */
101   int max_del;		/* Maximum allowed number of deletes. */
102   int max_subst;	/* Maximum allowed number of substitutes. */
103   int max_err;		/* Maximum allowed number of errors total. */
104 } regaparams_t;
105 
106 /* Approximate matching result struct. */
107 typedef struct {
108   size_t nmatch;	/* Length of pmatch[] array. */
109   regmatch_t *pmatch;	/* Submatch data. */
110   int cost;		/* Cost of the match. */
111   int num_ins;		/* Number of inserts in the match. */
112   int num_del;		/* Number of deletes in the match. */
113   int num_subst;	/* Number of substitutes in the match. */
114 } regamatch_t;
115 
116 typedef struct {
117   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
118   void (*rewind)(size_t pos, void *context);
119   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
120   void *context;
121 } tre_str_source;
122 
123 /* POSIX tre_regcomp() flags. */
124 #define REG_EXTENDED	1
125 #define REG_ICASE	(REG_EXTENDED << 1)
126 #define REG_NEWLINE	(REG_ICASE << 1)
127 #define REG_NOSUB	(REG_NEWLINE << 1)
128 
129 /* Extra tre_regcomp() flags. */
130 #define REG_BASIC	0
131 #define REG_LITERAL	(REG_NOSUB << 1)
132 #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
133 #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
134 #define REG_PEND	(REG_UNGREEDY << 1)
135 #define REG_ENHANCED	(REG_PEND << 1)
136 
137 /* alias regcomp flags. */
138 #define REG_NOSPEC	REG_LITERAL
139 #define REG_MINIMAL	REG_UNGREEDY
140 
141 /* POSIX tre_regexec() flags. */
142 #define REG_NOTBOL	1
143 #define REG_NOTEOL	(REG_NOTBOL << 1)
144 #define REG_STARTEND	(REG_NOTEOL << 1)
145 #define	REG_BACKR	(REG_STARTEND << 1)
146 
147 /* Extra tre_regexec() flags. */
148 #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
149 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
150 
151 /* The maximum number of iterations in a bound expression. */
152 #define RE_DUP_MAX 255
153 
154 #define _REG_nexec 1
155 
156 __BEGIN_DECLS
157 
158 /* The POSIX.2 regexp functions */
159 int
160 tre_regcomp(regex_t *preg, const char *regex, int cflags);
161 
162 int
163 tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
164 	regmatch_t pmatch[], int eflags);
165 
166 size_t
167 tre_regerror(int errcode, const regex_t *preg, char *errbuf,
168 	 size_t errbuf_size);
169 
170 void
171 tre_regfree(regex_t *preg);
172 
173 /* Wide character versions (not in POSIX.2). */
174 int
175 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
176 
177 int
178 tre_regwexec(const regex_t *preg, const wchar_t *string,
179 	 size_t nmatch, regmatch_t pmatch[], int eflags);
180 
181 /* Versions with a maximum length argument and therefore the capability to
182    handle null characters in the middle of the strings (not in POSIX.2). */
183 int
184 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
185 
186 int
187 tre_regnexec(const regex_t *preg, const char *string, size_t len,
188 	 size_t nmatch, regmatch_t pmatch[], int eflags);
189 
190 int
191 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
192 
193 int
194 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
195 	  size_t nmatch, regmatch_t pmatch[], int eflags);
196 
197 /* Returns the version string.	The returned string is static. */
198 char *
199 tre_version(void);
200 
201 /* Returns the value for a config parameter.  The type to which `result'
202    must point to depends of the value of `query', see documentation for
203    more details. */
204 int
205 tre_config(int query, void *result);
206 
207 /* Returns 1 if the compiled pattern has back references, 0 if not. */
208 int
209 tre_have_backrefs(const regex_t *preg);
210 
211 /* Returns 1 if the compiled pattern uses approximate matching features,
212    0 if not. */
213 int
214 tre_have_approx(const regex_t *preg);
215 __END_DECLS
216 
217 /* The POSIX.2 regexp functions, locale version */
218 int
219 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
220 
221 int
222 tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
223     locale_t locale);
224 
225 int
226 tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
227     locale_t locale);
228 
229 int
230 tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
231     locale_t locale);
232 
233 #endif /* !_REGEX_H_ */
234