1 /* 2 tre.h - TRE public API definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifndef TRE_H 10 #define TRE_H 1 11 12 #include "tre-config.h" 13 14 #ifdef HAVE_SYS_TYPES_H 15 #include <sys/types.h> 16 #endif /* HAVE_SYS_TYPES_H */ 17 18 #ifdef HAVE_LIBUTF8_H 19 #include <libutf8.h> 20 #endif /* HAVE_LIBUTF8_H */ 21 22 #ifdef TRE_USE_SYSTEM_REGEX_H 23 /* Include the system regex.h to make TRE ABI compatible with the 24 system regex. */ 25 #include TRE_SYSTEM_REGEX_H_PATH 26 #ifdef __weak_alias 27 __weak_alias(regcomp, tre_regcomp) 28 __weak_alias(regexec, tre_regexec) 29 __weak_alias(regerror, tre_regerror) 30 __weak_alias(regfree, tre_regfree) 31 #else 32 #define tre_regcomp regcomp 33 #define tre_regexec regexec 34 #define tre_regerror regerror 35 #define tre_regfree regfree 36 #endif 37 #endif /* TRE_USE_SYSTEM_REGEX_H */ 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 #ifdef TRE_USE_SYSTEM_REGEX_H 44 45 #ifndef REG_OK 46 #define REG_OK 0 47 #endif /* !REG_OK */ 48 49 #ifndef HAVE_REG_ERRCODE_T 50 typedef int reg_errcode_t; 51 #endif /* !HAVE_REG_ERRCODE_T */ 52 53 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL) 54 #define REG_LITERAL 0x1000 55 #endif 56 57 /* Extra tre_regcomp() flags. */ 58 #ifndef REG_BASIC 59 #define REG_BASIC 0 60 #endif /* !REG_BASIC */ 61 #define REG_RIGHT_ASSOC (REG_LITERAL << 1) 62 #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) 63 64 /* Extra tre_regexec() flags. */ 65 #define REG_APPROX_MATCHER 0x1000 66 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) 67 68 #else /* !TRE_USE_SYSTEM_REGEX_H */ 69 70 /* If the we're not using system regex.h, we need to define the 71 structs and enums ourselves. */ 72 73 typedef int regoff_t; 74 typedef struct { 75 size_t re_nsub; /* Number of parenthesized subexpressions. */ 76 void *value; /* For internal use only. */ 77 } regex_t; 78 79 typedef struct { 80 regoff_t rm_so; 81 regoff_t rm_eo; 82 } regmatch_t; 83 84 85 typedef enum { 86 REG_OK = 0, /* No error. */ 87 /* POSIX tre_regcomp() return error codes. (In the order listed in the 88 standard.) */ 89 REG_NOMATCH, /* No match. */ 90 REG_BADPAT, /* Invalid regexp. */ 91 REG_ECOLLATE, /* Unknown collating element. */ 92 REG_ECTYPE, /* Unknown character class name. */ 93 REG_EESCAPE, /* Trailing backslash. */ 94 REG_ESUBREG, /* Invalid back reference. */ 95 REG_EBRACK, /* "[]" imbalance */ 96 REG_EPAREN, /* "\(\)" or "()" imbalance */ 97 REG_EBRACE, /* "\{\}" or "{}" imbalance */ 98 REG_BADBR, /* Invalid content of {} */ 99 REG_ERANGE, /* Invalid use of range operator */ 100 REG_ESPACE, /* Out of memory. */ 101 REG_BADRPT, /* Invalid use of repetition operators. */ 102 REG_INVARG, /* Invalid arguments. */ 103 } reg_errcode_t; 104 105 /* POSIX tre_regcomp() flags. */ 106 #define REG_EXTENDED 1 107 #define REG_ICASE (REG_EXTENDED << 1) 108 #define REG_NEWLINE (REG_ICASE << 1) 109 #define REG_NOSUB (REG_NEWLINE << 1) 110 111 /* Extra tre_regcomp() flags. */ 112 #define REG_BASIC 0 113 #define REG_LITERAL (REG_NOSUB << 1) 114 #define REG_RIGHT_ASSOC (REG_LITERAL << 1) 115 #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) 116 117 #define REG_USEBYTES (REG_UNGREEDY << 1) 118 119 /* POSIX tre_regexec() flags. */ 120 #define REG_NOTBOL 1 121 #define REG_NOTEOL (REG_NOTBOL << 1) 122 123 /* Extra tre_regexec() flags. */ 124 #define REG_APPROX_MATCHER (REG_NOTEOL << 1) 125 #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) 126 #define REG_STARTEND (REG_BACKTRACKING_MATCHER << 1) 127 128 #endif /* !TRE_USE_SYSTEM_REGEX_H */ 129 130 /* REG_NOSPEC and REG_LITERAL mean the same thing. */ 131 #if defined(REG_LITERAL) && !defined(REG_NOSPEC) 132 #define REG_NOSPEC REG_LITERAL 133 #elif defined(REG_NOSPEC) && !defined(REG_LITERAL) 134 #define REG_LITERAL REG_NOSPEC 135 #endif /* defined(REG_NOSPEC) */ 136 137 /* The maximum number of iterations in a bound expression. */ 138 #undef RE_DUP_MAX 139 #define RE_DUP_MAX 255 140 141 /* The POSIX.2 regexp functions */ 142 extern int 143 tre_regcomp(regex_t *preg, const char *regex, int cflags); 144 145 extern int 146 tre_regexec(const regex_t *preg, const char *string, size_t nmatch, 147 regmatch_t pmatch[], int eflags); 148 149 extern int 150 tre_regcompb(regex_t *preg, const char *regex, int cflags); 151 152 extern int 153 tre_regexecb(const regex_t *preg, const char *string, size_t nmatch, 154 regmatch_t pmatch[], int eflags); 155 156 extern size_t 157 tre_regerror(int errcode, const regex_t *preg, char *errbuf, 158 size_t errbuf_size); 159 160 extern void 161 tre_regfree(regex_t *preg); 162 163 #ifdef TRE_WCHAR 164 #ifdef HAVE_WCHAR_H 165 #include <wchar.h> 166 #endif /* HAVE_WCHAR_H */ 167 168 /* Wide character versions (not in POSIX.2). */ 169 extern int 170 tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags); 171 172 extern int 173 tre_regwexec(const regex_t *preg, const wchar_t *string, 174 size_t nmatch, regmatch_t pmatch[], int eflags); 175 #endif /* TRE_WCHAR */ 176 177 /* Versions with a maximum length argument and therefore the capability to 178 handle null characters in the middle of the strings (not in POSIX.2). */ 179 extern int 180 tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags); 181 182 extern int 183 tre_regnexec(const regex_t *preg, const char *string, size_t len, 184 size_t nmatch, regmatch_t pmatch[], int eflags); 185 186 /* regn*b versions take byte literally as 8-bit values */ 187 extern int 188 tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags); 189 190 extern int 191 tre_regnexecb(const regex_t *preg, const char *str, size_t len, 192 size_t nmatch, regmatch_t pmatch[], int eflags); 193 194 #ifdef TRE_WCHAR 195 extern int 196 tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags); 197 198 extern int 199 tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len, 200 size_t nmatch, regmatch_t pmatch[], int eflags); 201 #endif /* TRE_WCHAR */ 202 203 #ifdef TRE_APPROX 204 205 /* Approximate matching parameter struct. */ 206 typedef struct { 207 int cost_ins; /* Default cost of an inserted character. */ 208 int cost_del; /* Default cost of a deleted character. */ 209 int cost_subst; /* Default cost of a substituted character. */ 210 int max_cost; /* Maximum allowed cost of a match. */ 211 212 int max_ins; /* Maximum allowed number of inserts. */ 213 int max_del; /* Maximum allowed number of deletes. */ 214 int max_subst; /* Maximum allowed number of substitutes. */ 215 int max_err; /* Maximum allowed number of errors total. */ 216 } regaparams_t; 217 218 /* Approximate matching result struct. */ 219 typedef struct { 220 size_t nmatch; /* Length of pmatch[] array. */ 221 regmatch_t *pmatch; /* Submatch data. */ 222 int cost; /* Cost of the match. */ 223 int num_ins; /* Number of inserts in the match. */ 224 int num_del; /* Number of deletes in the match. */ 225 int num_subst; /* Number of substitutes in the match. */ 226 } regamatch_t; 227 228 229 /* Approximate matching functions. */ 230 extern int 231 tre_regaexec(const regex_t *preg, const char *string, 232 regamatch_t *match, regaparams_t params, int eflags); 233 234 extern int 235 tre_reganexec(const regex_t *preg, const char *string, size_t len, 236 regamatch_t *match, regaparams_t params, int eflags); 237 238 extern int 239 tre_regaexecb(const regex_t *preg, const char *string, 240 regamatch_t *match, regaparams_t params, int eflags); 241 242 #ifdef TRE_WCHAR 243 /* Wide character approximate matching. */ 244 extern int 245 tre_regawexec(const regex_t *preg, const wchar_t *string, 246 regamatch_t *match, regaparams_t params, int eflags); 247 248 extern int 249 tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len, 250 regamatch_t *match, regaparams_t params, int eflags); 251 #endif /* TRE_WCHAR */ 252 253 /* Sets the parameters to default values. */ 254 extern void 255 tre_regaparams_default(regaparams_t *params); 256 #endif /* TRE_APPROX */ 257 258 #ifdef TRE_WCHAR 259 typedef wchar_t tre_char_t; 260 #else /* !TRE_WCHAR */ 261 typedef unsigned char tre_char_t; 262 #endif /* !TRE_WCHAR */ 263 264 typedef struct { 265 int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); 266 void (*rewind)(size_t pos, void *context); 267 int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); 268 void *context; 269 } tre_str_source; 270 271 extern int 272 tre_reguexec(const regex_t *preg, const tre_str_source *string, 273 size_t nmatch, regmatch_t pmatch[], int eflags); 274 275 /* Returns the version string. The returned string is static. */ 276 extern char * 277 tre_version(void); 278 279 /* Returns the value for a config parameter. The type to which `result' 280 must point to depends of the value of `query', see documentation for 281 more details. */ 282 extern int 283 tre_config(int query, void *result); 284 285 enum { 286 TRE_CONFIG_APPROX, 287 TRE_CONFIG_WCHAR, 288 TRE_CONFIG_MULTIBYTE, 289 TRE_CONFIG_SYSTEM_ABI, 290 TRE_CONFIG_VERSION 291 }; 292 293 /* Returns 1 if the compiled pattern has back references, 0 if not. */ 294 extern int 295 tre_have_backrefs(const regex_t *preg); 296 297 /* Returns 1 if the compiled pattern uses approximate matching features, 298 0 if not. */ 299 extern int 300 tre_have_approx(const regex_t *preg); 301 302 #ifdef __cplusplus 303 } 304 #endif 305 #endif /* TRE_H */ 306 307 /* EOF */ 308