xref: /netbsd-src/external/bsd/tre/dist/lib/tre.h (revision b2085f3f9f585877f381ec15d12bc8bd30e63f22)
163d4abf0Sagc /*
263d4abf0Sagc   tre.h - TRE public API definitions
363d4abf0Sagc 
463d4abf0Sagc   This software is released under a BSD-style license.
563d4abf0Sagc   See the file LICENSE for details and copyright.
663d4abf0Sagc 
763d4abf0Sagc */
863d4abf0Sagc 
963d4abf0Sagc #ifndef TRE_H
1063d4abf0Sagc #define TRE_H 1
1163d4abf0Sagc 
1282e82fcaSahoka #include "tre-config.h"
1363d4abf0Sagc 
1463d4abf0Sagc #ifdef HAVE_SYS_TYPES_H
1563d4abf0Sagc #include <sys/types.h>
1663d4abf0Sagc #endif /* HAVE_SYS_TYPES_H */
1763d4abf0Sagc 
1863d4abf0Sagc #ifdef HAVE_LIBUTF8_H
1963d4abf0Sagc #include <libutf8.h>
2063d4abf0Sagc #endif /* HAVE_LIBUTF8_H */
2163d4abf0Sagc 
2263d4abf0Sagc #ifdef TRE_USE_SYSTEM_REGEX_H
2363d4abf0Sagc /* Include the system regex.h to make TRE ABI compatible with the
2463d4abf0Sagc    system regex. */
2563d4abf0Sagc #include TRE_SYSTEM_REGEX_H_PATH
2682e82fcaSahoka #ifdef __weak_alias
__weak_alias(regcomp,tre_regcomp)27f2a3d147Schristos __weak_alias(regcomp, tre_regcomp)
28f2a3d147Schristos __weak_alias(regexec, tre_regexec)
29f2a3d147Schristos __weak_alias(regerror, tre_regerror)
30f2a3d147Schristos __weak_alias(regfree, tre_regfree)
3182e82fcaSahoka #else
3263d4abf0Sagc #define tre_regcomp  regcomp
3363d4abf0Sagc #define tre_regexec  regexec
3463d4abf0Sagc #define tre_regerror regerror
3563d4abf0Sagc #define tre_regfree  regfree
3682e82fcaSahoka #endif
3763d4abf0Sagc #endif /* TRE_USE_SYSTEM_REGEX_H */
3863d4abf0Sagc 
3963d4abf0Sagc #ifdef __cplusplus
4063d4abf0Sagc extern "C" {
4163d4abf0Sagc #endif
4263d4abf0Sagc 
4363d4abf0Sagc #ifdef TRE_USE_SYSTEM_REGEX_H
4463d4abf0Sagc 
4563d4abf0Sagc #ifndef REG_OK
4663d4abf0Sagc #define REG_OK 0
4763d4abf0Sagc #endif /* !REG_OK */
4863d4abf0Sagc 
4963d4abf0Sagc #ifndef HAVE_REG_ERRCODE_T
5063d4abf0Sagc typedef int reg_errcode_t;
5163d4abf0Sagc #endif /* !HAVE_REG_ERRCODE_T */
5263d4abf0Sagc 
5363d4abf0Sagc #if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
5463d4abf0Sagc #define REG_LITERAL 0x1000
5563d4abf0Sagc #endif
5663d4abf0Sagc 
5763d4abf0Sagc /* Extra tre_regcomp() flags. */
5863d4abf0Sagc #ifndef REG_BASIC
5963d4abf0Sagc #define REG_BASIC	0
6063d4abf0Sagc #endif /* !REG_BASIC */
6163d4abf0Sagc #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
6263d4abf0Sagc #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
6363d4abf0Sagc 
6463d4abf0Sagc /* Extra tre_regexec() flags. */
6563d4abf0Sagc #define REG_APPROX_MATCHER	 0x1000
6663d4abf0Sagc #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
6763d4abf0Sagc 
6863d4abf0Sagc #else /* !TRE_USE_SYSTEM_REGEX_H */
6963d4abf0Sagc 
7063d4abf0Sagc /* If the we're not using system regex.h, we need to define the
7163d4abf0Sagc    structs and enums ourselves. */
7263d4abf0Sagc 
7363d4abf0Sagc typedef int regoff_t;
7463d4abf0Sagc typedef struct {
7563d4abf0Sagc   size_t re_nsub;  /* Number of parenthesized subexpressions. */
7663d4abf0Sagc   void *value;	   /* For internal use only. */
7763d4abf0Sagc } regex_t;
7863d4abf0Sagc 
7963d4abf0Sagc typedef struct {
8063d4abf0Sagc   regoff_t rm_so;
8163d4abf0Sagc   regoff_t rm_eo;
8263d4abf0Sagc } regmatch_t;
8363d4abf0Sagc 
8463d4abf0Sagc 
8563d4abf0Sagc typedef enum {
8663d4abf0Sagc   REG_OK = 0,		/* No error. */
8763d4abf0Sagc   /* POSIX tre_regcomp() return error codes.  (In the order listed in the
8863d4abf0Sagc      standard.)	 */
8963d4abf0Sagc   REG_NOMATCH,		/* No match. */
9063d4abf0Sagc   REG_BADPAT,		/* Invalid regexp. */
9163d4abf0Sagc   REG_ECOLLATE,		/* Unknown collating element. */
9263d4abf0Sagc   REG_ECTYPE,		/* Unknown character class name. */
9363d4abf0Sagc   REG_EESCAPE,		/* Trailing backslash. */
9463d4abf0Sagc   REG_ESUBREG,		/* Invalid back reference. */
9563d4abf0Sagc   REG_EBRACK,		/* "[]" imbalance */
9663d4abf0Sagc   REG_EPAREN,		/* "\(\)" or "()" imbalance */
9763d4abf0Sagc   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
9863d4abf0Sagc   REG_BADBR,		/* Invalid content of {} */
9963d4abf0Sagc   REG_ERANGE,		/* Invalid use of range operator */
10063d4abf0Sagc   REG_ESPACE,		/* Out of memory.  */
101*b2085f3fSrin   REG_BADRPT,           /* Invalid use of repetition operators. */
102*b2085f3fSrin   REG_INVARG,           /* Invalid arguments. */
10363d4abf0Sagc } reg_errcode_t;
10463d4abf0Sagc 
10563d4abf0Sagc /* POSIX tre_regcomp() flags. */
10663d4abf0Sagc #define REG_EXTENDED	1
10763d4abf0Sagc #define REG_ICASE	(REG_EXTENDED << 1)
10863d4abf0Sagc #define REG_NEWLINE	(REG_ICASE << 1)
10963d4abf0Sagc #define REG_NOSUB	(REG_NEWLINE << 1)
11063d4abf0Sagc 
11163d4abf0Sagc /* Extra tre_regcomp() flags. */
11263d4abf0Sagc #define REG_BASIC	0
11363d4abf0Sagc #define REG_LITERAL	(REG_NOSUB << 1)
11463d4abf0Sagc #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
11563d4abf0Sagc #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
11663d4abf0Sagc 
11713498f30Srin #define REG_USEBYTES    (REG_UNGREEDY << 1)
11813498f30Srin 
11963d4abf0Sagc /* POSIX tre_regexec() flags. */
12063d4abf0Sagc #define REG_NOTBOL 1
12163d4abf0Sagc #define REG_NOTEOL (REG_NOTBOL << 1)
12263d4abf0Sagc 
12363d4abf0Sagc /* Extra tre_regexec() flags. */
12463d4abf0Sagc #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
12563d4abf0Sagc #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
126*b2085f3fSrin #define REG_STARTEND		 (REG_BACKTRACKING_MATCHER << 1)
12763d4abf0Sagc 
12863d4abf0Sagc #endif /* !TRE_USE_SYSTEM_REGEX_H */
12963d4abf0Sagc 
13063d4abf0Sagc /* REG_NOSPEC and REG_LITERAL mean the same thing. */
13163d4abf0Sagc #if defined(REG_LITERAL) && !defined(REG_NOSPEC)
13263d4abf0Sagc #define REG_NOSPEC	REG_LITERAL
13363d4abf0Sagc #elif defined(REG_NOSPEC) && !defined(REG_LITERAL)
13463d4abf0Sagc #define REG_LITERAL	REG_NOSPEC
13563d4abf0Sagc #endif /* defined(REG_NOSPEC) */
13663d4abf0Sagc 
13763d4abf0Sagc /* The maximum number of iterations in a bound expression. */
13863d4abf0Sagc #undef RE_DUP_MAX
13963d4abf0Sagc #define RE_DUP_MAX 255
14063d4abf0Sagc 
14163d4abf0Sagc /* The POSIX.2 regexp functions */
14263d4abf0Sagc extern int
14382e82fcaSahoka tre_regcomp(regex_t *preg, const char *regex, int cflags);
14463d4abf0Sagc 
14563d4abf0Sagc extern int
14682e82fcaSahoka tre_regexec(const regex_t *preg, const char *string, size_t nmatch,
14782e82fcaSahoka 	regmatch_t pmatch[], int eflags);
14863d4abf0Sagc 
14913498f30Srin extern int
15013498f30Srin tre_regcompb(regex_t *preg, const char *regex, int cflags);
15113498f30Srin 
15213498f30Srin extern int
15313498f30Srin tre_regexecb(const regex_t *preg, const char *string, size_t nmatch,
15413498f30Srin 	regmatch_t pmatch[], int eflags);
15513498f30Srin 
15663d4abf0Sagc extern size_t
15782e82fcaSahoka tre_regerror(int errcode, const regex_t *preg, char *errbuf,
15882e82fcaSahoka 	 size_t errbuf_size);
15963d4abf0Sagc 
16063d4abf0Sagc extern void
16182e82fcaSahoka tre_regfree(regex_t *preg);
16263d4abf0Sagc 
16363d4abf0Sagc #ifdef TRE_WCHAR
16463d4abf0Sagc #ifdef HAVE_WCHAR_H
16563d4abf0Sagc #include <wchar.h>
16663d4abf0Sagc #endif /* HAVE_WCHAR_H */
16763d4abf0Sagc 
16863d4abf0Sagc /* Wide character versions (not in POSIX.2). */
16963d4abf0Sagc extern int
17082e82fcaSahoka tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
17163d4abf0Sagc 
17263d4abf0Sagc extern int
17382e82fcaSahoka tre_regwexec(const regex_t *preg, const wchar_t *string,
17482e82fcaSahoka 	 size_t nmatch, regmatch_t pmatch[], int eflags);
17563d4abf0Sagc #endif /* TRE_WCHAR */
17663d4abf0Sagc 
17763d4abf0Sagc /* Versions with a maximum length argument and therefore the capability to
17863d4abf0Sagc    handle null characters in the middle of the strings (not in POSIX.2). */
17963d4abf0Sagc extern int
18082e82fcaSahoka tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
18163d4abf0Sagc 
18263d4abf0Sagc extern int
18382e82fcaSahoka tre_regnexec(const regex_t *preg, const char *string, size_t len,
18482e82fcaSahoka 	 size_t nmatch, regmatch_t pmatch[], int eflags);
18563d4abf0Sagc 
18613498f30Srin /* regn*b versions take byte literally as 8-bit values */
18713498f30Srin extern int
18813498f30Srin tre_regncompb(regex_t *preg, const char *regex, size_t n, int cflags);
18913498f30Srin 
19013498f30Srin extern int
19113498f30Srin tre_regnexecb(const regex_t *preg, const char *str, size_t len,
19213498f30Srin 	  size_t nmatch, regmatch_t pmatch[], int eflags);
19313498f30Srin 
19463d4abf0Sagc #ifdef TRE_WCHAR
19563d4abf0Sagc extern int
19682e82fcaSahoka tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
19763d4abf0Sagc 
19863d4abf0Sagc extern int
19982e82fcaSahoka tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
20082e82fcaSahoka 	  size_t nmatch, regmatch_t pmatch[], int eflags);
20163d4abf0Sagc #endif /* TRE_WCHAR */
20263d4abf0Sagc 
20363d4abf0Sagc #ifdef TRE_APPROX
20463d4abf0Sagc 
20563d4abf0Sagc /* Approximate matching parameter struct. */
20663d4abf0Sagc typedef struct {
20763d4abf0Sagc   int cost_ins;	       /* Default cost of an inserted character. */
20863d4abf0Sagc   int cost_del;	       /* Default cost of a deleted character. */
20963d4abf0Sagc   int cost_subst;      /* Default cost of a substituted character. */
21063d4abf0Sagc   int max_cost;	       /* Maximum allowed cost of a match. */
21163d4abf0Sagc 
21263d4abf0Sagc   int max_ins;	       /* Maximum allowed number of inserts. */
21363d4abf0Sagc   int max_del;	       /* Maximum allowed number of deletes. */
21463d4abf0Sagc   int max_subst;       /* Maximum allowed number of substitutes. */
21563d4abf0Sagc   int max_err;	       /* Maximum allowed number of errors total. */
21663d4abf0Sagc } regaparams_t;
21763d4abf0Sagc 
21863d4abf0Sagc /* Approximate matching result struct. */
21963d4abf0Sagc typedef struct {
22063d4abf0Sagc   size_t nmatch;       /* Length of pmatch[] array. */
22163d4abf0Sagc   regmatch_t *pmatch;  /* Submatch data. */
22263d4abf0Sagc   int cost;	       /* Cost of the match. */
22363d4abf0Sagc   int num_ins;	       /* Number of inserts in the match. */
22463d4abf0Sagc   int num_del;	       /* Number of deletes in the match. */
22563d4abf0Sagc   int num_subst;       /* Number of substitutes in the match. */
22663d4abf0Sagc } regamatch_t;
22763d4abf0Sagc 
22863d4abf0Sagc 
22963d4abf0Sagc /* Approximate matching functions. */
23063d4abf0Sagc extern int
23182e82fcaSahoka tre_regaexec(const regex_t *preg, const char *string,
23282e82fcaSahoka 	 regamatch_t *match, regaparams_t params, int eflags);
23363d4abf0Sagc 
23463d4abf0Sagc extern int
23582e82fcaSahoka tre_reganexec(const regex_t *preg, const char *string, size_t len,
23682e82fcaSahoka 	  regamatch_t *match, regaparams_t params, int eflags);
23713498f30Srin 
23813498f30Srin extern int
23913498f30Srin tre_regaexecb(const regex_t *preg, const char *string,
24013498f30Srin 	  regamatch_t *match, regaparams_t params, int eflags);
24113498f30Srin 
24263d4abf0Sagc #ifdef TRE_WCHAR
24363d4abf0Sagc /* Wide character approximate matching. */
24463d4abf0Sagc extern int
24582e82fcaSahoka tre_regawexec(const regex_t *preg, const wchar_t *string,
24682e82fcaSahoka 	  regamatch_t *match, regaparams_t params, int eflags);
24763d4abf0Sagc 
24863d4abf0Sagc extern int
24982e82fcaSahoka tre_regawnexec(const regex_t *preg, const wchar_t *string, size_t len,
25082e82fcaSahoka 	   regamatch_t *match, regaparams_t params, int eflags);
25163d4abf0Sagc #endif /* TRE_WCHAR */
25263d4abf0Sagc 
25363d4abf0Sagc /* Sets the parameters to default values. */
25463d4abf0Sagc extern void
25582e82fcaSahoka tre_regaparams_default(regaparams_t *params);
25663d4abf0Sagc #endif /* TRE_APPROX */
25763d4abf0Sagc 
25863d4abf0Sagc #ifdef TRE_WCHAR
25963d4abf0Sagc typedef wchar_t tre_char_t;
26063d4abf0Sagc #else /* !TRE_WCHAR */
26163d4abf0Sagc typedef unsigned char tre_char_t;
26263d4abf0Sagc #endif /* !TRE_WCHAR */
26363d4abf0Sagc 
26463d4abf0Sagc typedef struct {
26563d4abf0Sagc   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
26663d4abf0Sagc   void (*rewind)(size_t pos, void *context);
26763d4abf0Sagc   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
26863d4abf0Sagc   void *context;
26963d4abf0Sagc } tre_str_source;
27063d4abf0Sagc 
27163d4abf0Sagc extern int
27282e82fcaSahoka tre_reguexec(const regex_t *preg, const tre_str_source *string,
27382e82fcaSahoka 	 size_t nmatch, regmatch_t pmatch[], int eflags);
27463d4abf0Sagc 
27563d4abf0Sagc /* Returns the version string.	The returned string is static. */
27663d4abf0Sagc extern char *
27763d4abf0Sagc tre_version(void);
27863d4abf0Sagc 
27963d4abf0Sagc /* Returns the value for a config parameter.  The type to which `result'
28063d4abf0Sagc    must point to depends of the value of `query', see documentation for
28163d4abf0Sagc    more details. */
28263d4abf0Sagc extern int
28382e82fcaSahoka tre_config(int query, void *result);
28463d4abf0Sagc 
28563d4abf0Sagc enum {
28663d4abf0Sagc   TRE_CONFIG_APPROX,
28763d4abf0Sagc   TRE_CONFIG_WCHAR,
28863d4abf0Sagc   TRE_CONFIG_MULTIBYTE,
28963d4abf0Sagc   TRE_CONFIG_SYSTEM_ABI,
29063d4abf0Sagc   TRE_CONFIG_VERSION
29163d4abf0Sagc };
29263d4abf0Sagc 
29363d4abf0Sagc /* Returns 1 if the compiled pattern has back references, 0 if not. */
29463d4abf0Sagc extern int
29582e82fcaSahoka tre_have_backrefs(const regex_t *preg);
29663d4abf0Sagc 
29763d4abf0Sagc /* Returns 1 if the compiled pattern uses approximate matching features,
29863d4abf0Sagc    0 if not. */
29963d4abf0Sagc extern int
30082e82fcaSahoka tre_have_approx(const regex_t *preg);
30163d4abf0Sagc 
30263d4abf0Sagc #ifdef __cplusplus
30363d4abf0Sagc }
30463d4abf0Sagc #endif
30563d4abf0Sagc #endif				/* TRE_H */
30663d4abf0Sagc 
30763d4abf0Sagc /* EOF */
308