163d4abf0Sagc /*
263d4abf0Sagc tre.h - TRE public API definitions
363d4abf0Sagc
463d4abf0Sagc This software is released under a BSD-style license.
563d4abf0Sagc See the file LICENSE for details and copyright.
663d4abf0Sagc
763d4abf0Sagc */
863d4abf0Sagc
963d4abf0Sagc #ifndef TRE_H
1063d4abf0Sagc #define TRE_H 1
1163d4abf0Sagc
1282e82fcaSahoka #include "tre-config.h"
1363d4abf0Sagc
1463d4abf0Sagc #ifdef HAVE_SYS_TYPES_H
1563d4abf0Sagc #include <sys/types.h>
1663d4abf0Sagc #endif /* HAVE_SYS_TYPES_H */
1763d4abf0Sagc
1863d4abf0Sagc #ifdef HAVE_LIBUTF8_H
1963d4abf0Sagc #include <libutf8.h>
2063d4abf0Sagc #endif /* HAVE_LIBUTF8_H */
2163d4abf0Sagc
2263d4abf0Sagc #ifdef TRE_USE_SYSTEM_REGEX_H
2363d4abf0Sagc /* Include the system regex.h to make TRE ABI compatible with the
2463d4abf0Sagc system regex. */
2563d4abf0Sagc #include TRE_SYSTEM_REGEX_H_PATH
2682e82fcaSahoka #ifdef __weak_alias
__weak_alias(regcomp,tre_regcomp)27f2a3d147Schristos __weak_alias(regcomp, tre_regcomp)
28f2a3d147Schristos __weak_alias(regexec, tre_regexec)
29f2a3d147Schristos __weak_alias(regerror, tre_regerror)
30f2a3d147Schristos __weak_alias(regfree, tre_regfree)
3182e82fcaSahoka #else
3263d4abf0Sagc #define tre_regcomp regcomp
3363d4abf0Sagc #define tre_regexec regexec
3463d4abf0Sagc #define tre_regerror regerror
3563d4abf0Sagc #define tre_regfree regfree
3682e82fcaSahoka #endif
3763d4abf0Sagc #endif /* TRE_USE_SYSTEM_REGEX_H */
3863d4abf0Sagc
3963d4abf0Sagc #ifdef __cplusplus
4063d4abf0Sagc extern "C" {
4163d4abf0Sagc #endif
4263d4abf0Sagc
4363d4abf0Sagc #ifdef TRE_USE_SYSTEM_REGEX_H
4463d4abf0Sagc
4563d4abf0Sagc #ifndef REG_OK
4663d4abf0Sagc #define REG_OK 0
4763d4abf0Sagc #endif /* !REG_OK */
4863d4abf0Sagc
4963d4abf0Sagc #ifndef HAVE_REG_ERRCODE_T
5063d4abf0Sagc typedef int reg_errcode_t;
5163d4abf0Sagc #endif /* !HAVE_REG_ERRCODE_T */
5263d4abf0Sagc
5363d4abf0Sagc #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
5463d4abf0Sagc #define REG_LITERAL 0x1000
5563d4abf0Sagc #endif
5663d4abf0Sagc
5763d4abf0Sagc /* Extra tre_regcomp() flags. */
5863d4abf0Sagc #ifndef REG_BASIC
5963d4abf0Sagc #define REG_BASIC 0
6063d4abf0Sagc #endif /* !REG_BASIC */
6163d4abf0Sagc #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
6263d4abf0Sagc #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
6363d4abf0Sagc
6463d4abf0Sagc /* Extra tre_regexec() flags. */
6563d4abf0Sagc #define REG_APPROX_MATCHER 0x1000
6663d4abf0Sagc #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
6763d4abf0Sagc
6863d4abf0Sagc #else /* !TRE_USE_SYSTEM_REGEX_H */
6963d4abf0Sagc
7063d4abf0Sagc /* If the we're not using system regex.h, we need to define the
7163d4abf0Sagc structs and enums ourselves. */
7263d4abf0Sagc
7363d4abf0Sagc typedef int regoff_t;
7463d4abf0Sagc typedef struct {
7563d4abf0Sagc size_t re_nsub; /* Number of parenthesized subexpressions. */
7663d4abf0Sagc void *value; /* For internal use only. */
7763d4abf0Sagc } regex_t;
7863d4abf0Sagc
7963d4abf0Sagc typedef struct {
8063d4abf0Sagc regoff_t rm_so;
8163d4abf0Sagc regoff_t rm_eo;
8263d4abf0Sagc } regmatch_t;
8363d4abf0Sagc
8463d4abf0Sagc
8563d4abf0Sagc typedef enum {
8663d4abf0Sagc REG_OK = 0, /* No error. */
8763d4abf0Sagc /* POSIX tre_regcomp() return error codes. (In the order listed in the
8863d4abf0Sagc standard.) */
8963d4abf0Sagc REG_NOMATCH, /* No match. */
9063d4abf0Sagc REG_BADPAT, /* Invalid regexp. */
9163d4abf0Sagc REG_ECOLLATE, /* Unknown collating element. */
9263d4abf0Sagc REG_ECTYPE, /* Unknown character class name. */
9363d4abf0Sagc REG_EESCAPE, /* Trailing backslash. */
9463d4abf0Sagc REG_ESUBREG, /* Invalid back reference. */
9563d4abf0Sagc REG_EBRACK, /* "[]" imbalance */
9663d4abf0Sagc REG_EPAREN, /* "\(\)" or "()" imbalance */
9763d4abf0Sagc REG_EBRACE, /* "\{\}" or "{}" imbalance */
9863d4abf0Sagc REG_BADBR, /* Invalid content of {} */
9963d4abf0Sagc REG_ERANGE, /* Invalid use of range operator */
10063d4abf0Sagc REG_ESPACE, /* Out of memory. */
101*b2085f3fSrin REG_BADRPT, /* Invalid use of repetition operators. */
102*b2085f3fSrin REG_INVARG, /* Invalid arguments. */
10363d4abf0Sagc } reg_errcode_t;
10463d4abf0Sagc
10563d4abf0Sagc /* POSIX tre_regcomp() flags. */
10663d4abf0Sagc #define REG_EXTENDED 1
10763d4abf0Sagc #define REG_ICASE (REG_EXTENDED << 1)
10863d4abf0Sagc #define REG_NEWLINE (REG_ICASE << 1)
10963d4abf0Sagc #define REG_NOSUB (REG_NEWLINE << 1)
11063d4abf0Sagc
11163d4abf0Sagc /* Extra tre_regcomp() flags. */
11263d4abf0Sagc #define REG_BASIC 0
11363d4abf0Sagc #define REG_LITERAL (REG_NOSUB << 1)
11463d4abf0Sagc #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
11563d4abf0Sagc #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1)
11663d4abf0Sagc
11713498f30Srin #define REG_USEBYTES (REG_UNGREEDY << 1)
11813498f30Srin
11963d4abf0Sagc /* POSIX tre_regexec() flags. */
12063d4abf0Sagc #define REG_NOTBOL 1
12163d4abf0Sagc #define REG_NOTEOL (REG_NOTBOL << 1)
12263d4abf0Sagc
12363d4abf0Sagc /* Extra tre_regexec() flags. */
12463d4abf0Sagc #define REG_APPROX_MATCHER (REG_NOTEOL << 1)
12563d4abf0Sagc #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
126*b2085f3fSrin #define REG_STARTEND (REG_BACKTRACKING_MATCHER << 1)
12763d4abf0Sagc
12863d4abf0Sagc #endif /* !TRE_USE_SYSTEM_REGEX_H */
12963d4abf0Sagc
13063d4abf0Sagc /* REG_NOSPEC and REG_LITERAL mean the same thing. */
13163d4abf0Sagc #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
13263d4abf0Sagc #define REG_NOSPEC REG_LITERAL
13363d4abf0Sagc #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
13463d4abf0Sagc #define REG_LITERAL REG_NOSPEC
13563d4abf0Sagc #endif /* defined(REG_NOSPEC) */
13663d4abf0Sagc
13763d4abf0Sagc /* The maximum number of iterations in a bound expression. */
13863d4abf0Sagc #undef RE_DUP_MAX
13963d4abf0Sagc #define RE_DUP_MAX 255
14063d4abf0Sagc
14163d4abf0Sagc /* The POSIX.2 regexp functions */
14263d4abf0Sagc extern int
14382e82fcaSahoka tre_regcomp(regex_t *preg, const char *regex, int cflags);
14463d4abf0Sagc
14563d4abf0Sagc extern int
14682e82fcaSahoka tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
14782e82fcaSahoka regmatch_t pmatch[], int eflags);
14863d4abf0Sagc
14913498f30Srin extern int
15013498f30Srin tre_regcompb(regex_t *preg, const char *regex, int cflags);
15113498f30Srin
15213498f30Srin extern int
15313498f30Srin tre_regexecb(const regex_t *preg, const char *string, size_t nmatch,
15413498f30Srin regmatch_t pmatch[], int eflags);
15513498f30Srin
15663d4abf0Sagc extern size_t
15782e82fcaSahoka tre_regerror(int errcode, const regex_t *preg, char *errbuf,
15882e82fcaSahoka size_t errbuf_size);
15963d4abf0Sagc
16063d4abf0Sagc extern void
16182e82fcaSahoka tre_regfree(regex_t *preg);
16263d4abf0Sagc
16363d4abf0Sagc #ifdef TRE_WCHAR
16463d4abf0Sagc #ifdef HAVE_WCHAR_H
16563d4abf0Sagc #include <wchar.h>
16663d4abf0Sagc #endif /* HAVE_WCHAR_H */
16763d4abf0Sagc
16863d4abf0Sagc /* Wide character versions (not in POSIX.2). */
16963d4abf0Sagc extern int
17082e82fcaSahoka tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
17163d4abf0Sagc
17263d4abf0Sagc extern int
17382e82fcaSahoka tre_regwexec(const regex_t *preg, const wchar_t *string,
17482e82fcaSahoka size_t nmatch, regmatch_t pmatch[], int eflags);
17563d4abf0Sagc #endif /* TRE_WCHAR */
17663d4abf0Sagc
17763d4abf0Sagc /* Versions with a maximum length argument and therefore the capability to
17863d4abf0Sagc handle null characters in the middle of the strings (not in POSIX.2). */
17963d4abf0Sagc extern int
18082e82fcaSahoka tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
18163d4abf0Sagc
18263d4abf0Sagc extern int
18382e82fcaSahoka tre_regnexec(const regex_t *preg, const char *string, size_t len,
18482e82fcaSahoka size_t nmatch, regmatch_t pmatch[], int eflags);
18563d4abf0Sagc
18613498f30Srin /* regn*b versions take byte literally as 8-bit values */
18713498f30Srin extern int
18813498f30Srin tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags);
18913498f30Srin
19013498f30Srin extern int
19113498f30Srin tre_regnexecb(const regex_t *preg, const char *str, size_t len,
19213498f30Srin size_t nmatch, regmatch_t pmatch[], int eflags);
19313498f30Srin
19463d4abf0Sagc #ifdef TRE_WCHAR
19563d4abf0Sagc extern int
19682e82fcaSahoka tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
19763d4abf0Sagc
19863d4abf0Sagc extern int
19982e82fcaSahoka tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
20082e82fcaSahoka size_t nmatch, regmatch_t pmatch[], int eflags);
20163d4abf0Sagc #endif /* TRE_WCHAR */
20263d4abf0Sagc
20363d4abf0Sagc #ifdef TRE_APPROX
20463d4abf0Sagc
20563d4abf0Sagc /* Approximate matching parameter struct. */
20663d4abf0Sagc typedef struct {
20763d4abf0Sagc int cost_ins; /* Default cost of an inserted character. */
20863d4abf0Sagc int cost_del; /* Default cost of a deleted character. */
20963d4abf0Sagc int cost_subst; /* Default cost of a substituted character. */
21063d4abf0Sagc int max_cost; /* Maximum allowed cost of a match. */
21163d4abf0Sagc
21263d4abf0Sagc int max_ins; /* Maximum allowed number of inserts. */
21363d4abf0Sagc int max_del; /* Maximum allowed number of deletes. */
21463d4abf0Sagc int max_subst; /* Maximum allowed number of substitutes. */
21563d4abf0Sagc int max_err; /* Maximum allowed number of errors total. */
21663d4abf0Sagc } regaparams_t;
21763d4abf0Sagc
21863d4abf0Sagc /* Approximate matching result struct. */
21963d4abf0Sagc typedef struct {
22063d4abf0Sagc size_t nmatch; /* Length of pmatch[] array. */
22163d4abf0Sagc regmatch_t *pmatch; /* Submatch data. */
22263d4abf0Sagc int cost; /* Cost of the match. */
22363d4abf0Sagc int num_ins; /* Number of inserts in the match. */
22463d4abf0Sagc int num_del; /* Number of deletes in the match. */
22563d4abf0Sagc int num_subst; /* Number of substitutes in the match. */
22663d4abf0Sagc } regamatch_t;
22763d4abf0Sagc
22863d4abf0Sagc
22963d4abf0Sagc /* Approximate matching functions. */
23063d4abf0Sagc extern int
23182e82fcaSahoka tre_regaexec(const regex_t *preg, const char *string,
23282e82fcaSahoka regamatch_t *match, regaparams_t params, int eflags);
23363d4abf0Sagc
23463d4abf0Sagc extern int
23582e82fcaSahoka tre_reganexec(const regex_t *preg, const char *string, size_t len,
23682e82fcaSahoka regamatch_t *match, regaparams_t params, int eflags);
23713498f30Srin
23813498f30Srin extern int
23913498f30Srin tre_regaexecb(const regex_t *preg, const char *string,
24013498f30Srin regamatch_t *match, regaparams_t params, int eflags);
24113498f30Srin
24263d4abf0Sagc #ifdef TRE_WCHAR
24363d4abf0Sagc /* Wide character approximate matching. */
24463d4abf0Sagc extern int
24582e82fcaSahoka tre_regawexec(const regex_t *preg, const wchar_t *string,
24682e82fcaSahoka regamatch_t *match, regaparams_t params, int eflags);
24763d4abf0Sagc
24863d4abf0Sagc extern int
24982e82fcaSahoka tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
25082e82fcaSahoka regamatch_t *match, regaparams_t params, int eflags);
25163d4abf0Sagc #endif /* TRE_WCHAR */
25263d4abf0Sagc
25363d4abf0Sagc /* Sets the parameters to default values. */
25463d4abf0Sagc extern void
25582e82fcaSahoka tre_regaparams_default(regaparams_t *params);
25663d4abf0Sagc #endif /* TRE_APPROX */
25763d4abf0Sagc
25863d4abf0Sagc #ifdef TRE_WCHAR
25963d4abf0Sagc typedef wchar_t tre_char_t;
26063d4abf0Sagc #else /* !TRE_WCHAR */
26163d4abf0Sagc typedef unsigned char tre_char_t;
26263d4abf0Sagc #endif /* !TRE_WCHAR */
26363d4abf0Sagc
26463d4abf0Sagc typedef struct {
26563d4abf0Sagc int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
26663d4abf0Sagc void (*rewind)(size_t pos, void *context);
26763d4abf0Sagc int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
26863d4abf0Sagc void *context;
26963d4abf0Sagc } tre_str_source;
27063d4abf0Sagc
27163d4abf0Sagc extern int
27282e82fcaSahoka tre_reguexec(const regex_t *preg, const tre_str_source *string,
27382e82fcaSahoka size_t nmatch, regmatch_t pmatch[], int eflags);
27463d4abf0Sagc
27563d4abf0Sagc /* Returns the version string. The returned string is static. */
27663d4abf0Sagc extern char *
27763d4abf0Sagc tre_version(void);
27863d4abf0Sagc
27963d4abf0Sagc /* Returns the value for a config parameter. The type to which `result'
28063d4abf0Sagc must point to depends of the value of `query', see documentation for
28163d4abf0Sagc more details. */
28263d4abf0Sagc extern int
28382e82fcaSahoka tre_config(int query, void *result);
28463d4abf0Sagc
28563d4abf0Sagc enum {
28663d4abf0Sagc TRE_CONFIG_APPROX,
28763d4abf0Sagc TRE_CONFIG_WCHAR,
28863d4abf0Sagc TRE_CONFIG_MULTIBYTE,
28963d4abf0Sagc TRE_CONFIG_SYSTEM_ABI,
29063d4abf0Sagc TRE_CONFIG_VERSION
29163d4abf0Sagc };
29263d4abf0Sagc
29363d4abf0Sagc /* Returns 1 if the compiled pattern has back references, 0 if not. */
29463d4abf0Sagc extern int
29582e82fcaSahoka tre_have_backrefs(const regex_t *preg);
29663d4abf0Sagc
29763d4abf0Sagc /* Returns 1 if the compiled pattern uses approximate matching features,
29863d4abf0Sagc 0 if not. */
29963d4abf0Sagc extern int
30082e82fcaSahoka tre_have_approx(const regex_t *preg);
30163d4abf0Sagc
30263d4abf0Sagc #ifdef __cplusplus
30363d4abf0Sagc }
30463d4abf0Sagc #endif
30563d4abf0Sagc #endif /* TRE_H */
30663d4abf0Sagc
30763d4abf0Sagc /* EOF */
308