xref: /dflybsd-src/lib/libc/tre-regex/regex.h (revision f85b95dda56c5674337d123e2cfef09de2e15167)
16af9a77bSJohn Marino /*
26af9a77bSJohn Marino  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
36af9a77bSJohn Marino  * All rights reserved.
46af9a77bSJohn Marino  *
56af9a77bSJohn Marino  * Redistribution and use in source and binary forms, with or without
66af9a77bSJohn Marino  * modification, are permitted provided that the following conditions
76af9a77bSJohn Marino  * are met:
86af9a77bSJohn Marino  *
96af9a77bSJohn Marino  * 1. Redistributions of source code must retain the above copyright
106af9a77bSJohn Marino  *    notice, this list of conditions and the following disclaimer.
116af9a77bSJohn Marino  *
126af9a77bSJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
136af9a77bSJohn Marino  *    notice, this list of conditions and the following disclaimer in the
146af9a77bSJohn Marino  *    documentation and/or other materials provided with the distribution.
156af9a77bSJohn Marino  *
166af9a77bSJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
176af9a77bSJohn Marino  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
186af9a77bSJohn Marino  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
196af9a77bSJohn Marino  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
206af9a77bSJohn Marino  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
216af9a77bSJohn Marino  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
226af9a77bSJohn Marino  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
236af9a77bSJohn Marino  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
246af9a77bSJohn Marino  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
256af9a77bSJohn Marino  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
266af9a77bSJohn Marino  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
276af9a77bSJohn Marino  *
286af9a77bSJohn Marino  */
296af9a77bSJohn Marino 
306af9a77bSJohn Marino #ifndef _REGEX_H_
316af9a77bSJohn Marino #define	_REGEX_H_
326af9a77bSJohn Marino 
336af9a77bSJohn Marino #include <sys/cdefs.h>
346af9a77bSJohn Marino #include <sys/types.h>
356af9a77bSJohn Marino #include <wchar.h>
3631671741SJohn Marino #include <xlocale.h>
376af9a77bSJohn Marino 
386af9a77bSJohn Marino #define tre_regcomp   regcomp
39012b8ed8SSascha Wildner #define tre_regcomp_l regcomp_l
406af9a77bSJohn Marino #define tre_regexec   regexec
416af9a77bSJohn Marino #define tre_regerror  regerror
426af9a77bSJohn Marino #define tre_regfree   regfree
436af9a77bSJohn Marino 
446af9a77bSJohn Marino #define tre_regncomp  regncomp
45012b8ed8SSascha Wildner #define tre_regncomp_l regncomp_l
466af9a77bSJohn Marino #define tre_regnexec  regnexec
47012b8ed8SSascha Wildner #define tre_regwcomp  regwcomp
48012b8ed8SSascha Wildner #define tre_regwcomp_l regwcomp_l
49012b8ed8SSascha Wildner #define tre_regwexec  regwexec
50012b8ed8SSascha Wildner #define tre_regwncomp regwncomp
51012b8ed8SSascha Wildner #define tre_regwncomp_l regwncomp_l
52012b8ed8SSascha Wildner #define tre_regwnexec regwnexec
536af9a77bSJohn Marino 
546af9a77bSJohn Marino typedef enum {
55*f85b95ddSSascha Wildner #if __BSD_VISIBLE || (__POSIX_VISIBLE && __POSIX_VISIBLE < 200809)
56c9e62bc0SSascha Wildner   REG_ENOSYS = -1,	/* Reserved */
57c9e62bc0SSascha Wildner #endif
586af9a77bSJohn Marino   REG_OK = 0,		/* No error. */
596af9a77bSJohn Marino   REG_NOMATCH,		/* No match. */
606af9a77bSJohn Marino   REG_BADPAT,		/* Invalid regexp. */
616af9a77bSJohn Marino   REG_ECOLLATE,		/* Unknown collating element. */
626af9a77bSJohn Marino   REG_ECTYPE,		/* Unknown character class name. */
636af9a77bSJohn Marino   REG_EESCAPE,		/* Trailing backslash. */
646af9a77bSJohn Marino   REG_ESUBREG,		/* Invalid back reference. */
656af9a77bSJohn Marino   REG_EBRACK,		/* "[]" imbalance */
666af9a77bSJohn Marino   REG_EPAREN,		/* "\(\)" or "()" imbalance */
676af9a77bSJohn Marino   REG_EBRACE,		/* "\{\}" or "{}" imbalance */
686af9a77bSJohn Marino   REG_BADBR,		/* Invalid content of {} */
696af9a77bSJohn Marino   REG_ERANGE,		/* Invalid use of range operator */
706af9a77bSJohn Marino   REG_ESPACE,		/* Out of memory.  */
716af9a77bSJohn Marino   REG_BADRPT,           /* Invalid use of repetition operators. */
726af9a77bSJohn Marino   REG_EMPTY,            /* rexexp was zero-length string */
736af9a77bSJohn Marino   REG_INVARG,           /* invalid argument to regex routine */
746af9a77bSJohn Marino   REG_ILLSEQ            /* illegal byte sequence */
756af9a77bSJohn Marino } reg_errcode_t;
766af9a77bSJohn Marino 
776af9a77bSJohn Marino enum {
786af9a77bSJohn Marino   TRE_CONFIG_APPROX,
796af9a77bSJohn Marino   TRE_CONFIG_WCHAR,
806af9a77bSJohn Marino   TRE_CONFIG_MULTIBYTE,
816af9a77bSJohn Marino   TRE_CONFIG_SYSTEM_ABI,
826af9a77bSJohn Marino   TRE_CONFIG_VERSION
836af9a77bSJohn Marino };
846af9a77bSJohn Marino 
856af9a77bSJohn Marino typedef int regoff_t;
866af9a77bSJohn Marino typedef wchar_t tre_char_t;
876af9a77bSJohn Marino 
886af9a77bSJohn Marino typedef struct {
896af9a77bSJohn Marino   int re_magic;
906af9a77bSJohn Marino   size_t re_nsub;  /* Number of parenthesized subexpressions. */
916af9a77bSJohn Marino   const void *re_endp; /* regex string end pointer (REG_PEND) */
926af9a77bSJohn Marino   void *value;	   /* For internal use only. */
936af9a77bSJohn Marino } regex_t;
946af9a77bSJohn Marino 
956af9a77bSJohn Marino typedef struct {
966af9a77bSJohn Marino   regoff_t rm_so;
976af9a77bSJohn Marino   regoff_t rm_eo;
986af9a77bSJohn Marino } regmatch_t;
996af9a77bSJohn Marino 
1006af9a77bSJohn Marino /* Approximate matching parameter struct. */
1016af9a77bSJohn Marino typedef struct {
1026af9a77bSJohn Marino   int cost_ins;		/* Default cost of an inserted character. */
1036af9a77bSJohn Marino   int cost_del;		/* Default cost of a deleted character. */
1046af9a77bSJohn Marino   int cost_subst;	/* Default cost of a substituted character. */
1056af9a77bSJohn Marino   int max_cost;		/* Maximum allowed cost of a match. */
1066af9a77bSJohn Marino 
1076af9a77bSJohn Marino   int max_ins;		/* Maximum allowed number of inserts. */
1086af9a77bSJohn Marino   int max_del;		/* Maximum allowed number of deletes. */
1096af9a77bSJohn Marino   int max_subst;	/* Maximum allowed number of substitutes. */
1106af9a77bSJohn Marino   int max_err;		/* Maximum allowed number of errors total. */
1116af9a77bSJohn Marino } regaparams_t;
1126af9a77bSJohn Marino 
1136af9a77bSJohn Marino /* Approximate matching result struct. */
1146af9a77bSJohn Marino typedef struct {
1156af9a77bSJohn Marino   size_t nmatch;	/* Length of pmatch[] array. */
1166af9a77bSJohn Marino   regmatch_t *pmatch;	/* Submatch data. */
1176af9a77bSJohn Marino   int cost;		/* Cost of the match. */
1186af9a77bSJohn Marino   int num_ins;		/* Number of inserts in the match. */
1196af9a77bSJohn Marino   int num_del;		/* Number of deletes in the match. */
1206af9a77bSJohn Marino   int num_subst;	/* Number of substitutes in the match. */
1216af9a77bSJohn Marino } regamatch_t;
1226af9a77bSJohn Marino 
1236af9a77bSJohn Marino typedef struct {
1246af9a77bSJohn Marino   int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context);
1256af9a77bSJohn Marino   void (*rewind)(size_t pos, void *context);
1266af9a77bSJohn Marino   int (*compare)(size_t pos1, size_t pos2, size_t len, void *context);
1276af9a77bSJohn Marino   void *context;
1286af9a77bSJohn Marino } tre_str_source;
1296af9a77bSJohn Marino 
1306af9a77bSJohn Marino /* POSIX tre_regcomp() flags. */
1316af9a77bSJohn Marino #define REG_EXTENDED	1
1326af9a77bSJohn Marino #define REG_ICASE	(REG_EXTENDED << 1)
1336af9a77bSJohn Marino #define REG_NEWLINE	(REG_ICASE << 1)
1346af9a77bSJohn Marino #define REG_NOSUB	(REG_NEWLINE << 1)
1356af9a77bSJohn Marino 
1366af9a77bSJohn Marino /* Extra tre_regcomp() flags. */
1376af9a77bSJohn Marino #define REG_BASIC	0
1386af9a77bSJohn Marino #define REG_LITERAL	(REG_NOSUB << 1)
1396af9a77bSJohn Marino #define REG_RIGHT_ASSOC (REG_LITERAL << 1)
1406af9a77bSJohn Marino #define REG_UNGREEDY    (REG_RIGHT_ASSOC << 1)
1416af9a77bSJohn Marino #define REG_PEND	(REG_UNGREEDY << 1)
1426af9a77bSJohn Marino #define REG_ENHANCED	(REG_PEND << 1)
1436af9a77bSJohn Marino 
1446af9a77bSJohn Marino /* alias regcomp flags. */
1456af9a77bSJohn Marino #define REG_NOSPEC	REG_LITERAL
1466af9a77bSJohn Marino #define REG_MINIMAL	REG_UNGREEDY
1476af9a77bSJohn Marino 
1486af9a77bSJohn Marino /* POSIX tre_regexec() flags. */
1496af9a77bSJohn Marino #define REG_NOTBOL	1
1506af9a77bSJohn Marino #define REG_NOTEOL	(REG_NOTBOL << 1)
1516af9a77bSJohn Marino #define REG_STARTEND	(REG_NOTEOL << 1)
1526af9a77bSJohn Marino #define	REG_BACKR	(REG_STARTEND << 1)
1536af9a77bSJohn Marino 
1546af9a77bSJohn Marino /* Extra tre_regexec() flags. */
1556af9a77bSJohn Marino #define REG_APPROX_MATCHER	 (REG_NOTEOL << 1)
1566af9a77bSJohn Marino #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1)
1576af9a77bSJohn Marino 
1586af9a77bSJohn Marino /* The maximum number of iterations in a bound expression. */
1596af9a77bSJohn Marino #define RE_DUP_MAX 255
1606af9a77bSJohn Marino 
1616af9a77bSJohn Marino #define _REG_nexec 1
1626af9a77bSJohn Marino 
1636af9a77bSJohn Marino __BEGIN_DECLS
1646af9a77bSJohn Marino 
1656af9a77bSJohn Marino /* The POSIX.2 regexp functions */
1666af9a77bSJohn Marino int
167d33005aaSSascha Wildner tre_regcomp(regex_t * __restrict preg, const char * __restrict regex,
168d33005aaSSascha Wildner     int cflags);
1696af9a77bSJohn Marino 
1706af9a77bSJohn Marino int
171d33005aaSSascha Wildner tre_regexec(const regex_t * __restrict preg, const char * __restrict string,
172d33005aaSSascha Wildner     size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags);
1736af9a77bSJohn Marino 
1746af9a77bSJohn Marino size_t
175d33005aaSSascha Wildner tre_regerror(int errcode, const regex_t * __restrict preg,
176d33005aaSSascha Wildner     char * __restrict errbuf, size_t errbuf_size);
1776af9a77bSJohn Marino 
1786af9a77bSJohn Marino void
1796af9a77bSJohn Marino tre_regfree(regex_t *preg);
1806af9a77bSJohn Marino 
1816af9a77bSJohn Marino /* Wide character versions (not in POSIX.2). */
1826af9a77bSJohn Marino int
1836af9a77bSJohn Marino tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags);
1846af9a77bSJohn Marino 
1856af9a77bSJohn Marino int
1866af9a77bSJohn Marino tre_regwexec(const regex_t *preg, const wchar_t *string,
1876af9a77bSJohn Marino 	 size_t nmatch, regmatch_t pmatch[], int eflags);
1886af9a77bSJohn Marino 
1896af9a77bSJohn Marino /* Versions with a maximum length argument and therefore the capability to
1906af9a77bSJohn Marino    handle null characters in the middle of the strings (not in POSIX.2). */
1916af9a77bSJohn Marino int
1926af9a77bSJohn Marino tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags);
1936af9a77bSJohn Marino 
1946af9a77bSJohn Marino int
1956af9a77bSJohn Marino tre_regnexec(const regex_t *preg, const char *string, size_t len,
1966af9a77bSJohn Marino 	 size_t nmatch, regmatch_t pmatch[], int eflags);
1976af9a77bSJohn Marino 
1986af9a77bSJohn Marino int
1996af9a77bSJohn Marino tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags);
2006af9a77bSJohn Marino 
2016af9a77bSJohn Marino int
2026af9a77bSJohn Marino tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len,
2036af9a77bSJohn Marino 	  size_t nmatch, regmatch_t pmatch[], int eflags);
2046af9a77bSJohn Marino 
2056af9a77bSJohn Marino /* Returns the version string.	The returned string is static. */
2066af9a77bSJohn Marino char *
2076af9a77bSJohn Marino tre_version(void);
2086af9a77bSJohn Marino 
2096af9a77bSJohn Marino /* Returns the value for a config parameter.  The type to which `result'
2106af9a77bSJohn Marino    must point to depends of the value of `query', see documentation for
2116af9a77bSJohn Marino    more details. */
2126af9a77bSJohn Marino int
2136af9a77bSJohn Marino tre_config(int query, void *result);
2146af9a77bSJohn Marino 
2156af9a77bSJohn Marino /* Returns 1 if the compiled pattern has back references, 0 if not. */
2166af9a77bSJohn Marino int
2176af9a77bSJohn Marino tre_have_backrefs(const regex_t *preg);
2186af9a77bSJohn Marino 
2196af9a77bSJohn Marino /* Returns 1 if the compiled pattern uses approximate matching features,
2206af9a77bSJohn Marino    0 if not. */
2216af9a77bSJohn Marino int
2226af9a77bSJohn Marino tre_have_approx(const regex_t *preg);
2236af9a77bSJohn Marino __END_DECLS
2246af9a77bSJohn Marino 
2256af9a77bSJohn Marino /* The POSIX.2 regexp functions, locale version */
2266af9a77bSJohn Marino int
2276af9a77bSJohn Marino tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale);
2286af9a77bSJohn Marino 
2296af9a77bSJohn Marino int
2306af9a77bSJohn Marino tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags,
2316af9a77bSJohn Marino     locale_t locale);
2326af9a77bSJohn Marino 
2336af9a77bSJohn Marino int
2346af9a77bSJohn Marino tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags,
2356af9a77bSJohn Marino     locale_t locale);
2366af9a77bSJohn Marino 
2376af9a77bSJohn Marino int
2386af9a77bSJohn Marino tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags,
2396af9a77bSJohn Marino     locale_t locale);
2406af9a77bSJohn Marino 
2416af9a77bSJohn Marino #endif /* !_REGEX_H_ */
242