16af9a77bSJohn Marino /* 26af9a77bSJohn Marino * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 36af9a77bSJohn Marino * All rights reserved. 46af9a77bSJohn Marino * 56af9a77bSJohn Marino * Redistribution and use in source and binary forms, with or without 66af9a77bSJohn Marino * modification, are permitted provided that the following conditions 76af9a77bSJohn Marino * are met: 86af9a77bSJohn Marino * 96af9a77bSJohn Marino * 1. Redistributions of source code must retain the above copyright 106af9a77bSJohn Marino * notice, this list of conditions and the following disclaimer. 116af9a77bSJohn Marino * 126af9a77bSJohn Marino * 2. Redistributions in binary form must reproduce the above copyright 136af9a77bSJohn Marino * notice, this list of conditions and the following disclaimer in the 146af9a77bSJohn Marino * documentation and/or other materials provided with the distribution. 156af9a77bSJohn Marino * 166af9a77bSJohn Marino * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 176af9a77bSJohn Marino * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 186af9a77bSJohn Marino * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 196af9a77bSJohn Marino * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 206af9a77bSJohn Marino * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 216af9a77bSJohn Marino * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 226af9a77bSJohn Marino * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 236af9a77bSJohn Marino * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 246af9a77bSJohn Marino * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 256af9a77bSJohn Marino * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 266af9a77bSJohn Marino * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 276af9a77bSJohn Marino * 286af9a77bSJohn Marino */ 296af9a77bSJohn Marino 306af9a77bSJohn Marino #ifndef _REGEX_H_ 316af9a77bSJohn Marino #define _REGEX_H_ 326af9a77bSJohn Marino 336af9a77bSJohn Marino #include <sys/cdefs.h> 346af9a77bSJohn Marino #include <sys/types.h> 356af9a77bSJohn Marino #include <wchar.h> 3631671741SJohn Marino #include <xlocale.h> 376af9a77bSJohn Marino 386af9a77bSJohn Marino #define tre_regcomp regcomp 39012b8ed8SSascha Wildner #define tre_regcomp_l regcomp_l 406af9a77bSJohn Marino #define tre_regexec regexec 416af9a77bSJohn Marino #define tre_regerror regerror 426af9a77bSJohn Marino #define tre_regfree regfree 436af9a77bSJohn Marino 446af9a77bSJohn Marino #define tre_regncomp regncomp 45012b8ed8SSascha Wildner #define tre_regncomp_l regncomp_l 466af9a77bSJohn Marino #define tre_regnexec regnexec 47012b8ed8SSascha Wildner #define tre_regwcomp regwcomp 48012b8ed8SSascha Wildner #define tre_regwcomp_l regwcomp_l 49012b8ed8SSascha Wildner #define tre_regwexec regwexec 50012b8ed8SSascha Wildner #define tre_regwncomp regwncomp 51012b8ed8SSascha Wildner #define tre_regwncomp_l regwncomp_l 52012b8ed8SSascha Wildner #define tre_regwnexec regwnexec 536af9a77bSJohn Marino 546af9a77bSJohn Marino typedef enum { 55*f85b95ddSSascha Wildner #if __BSD_VISIBLE || (__POSIX_VISIBLE && __POSIX_VISIBLE < 200809) 56c9e62bc0SSascha Wildner REG_ENOSYS = -1, /* Reserved */ 57c9e62bc0SSascha Wildner #endif 586af9a77bSJohn Marino REG_OK = 0, /* No error. */ 596af9a77bSJohn Marino REG_NOMATCH, /* No match. */ 606af9a77bSJohn Marino REG_BADPAT, /* Invalid regexp. */ 616af9a77bSJohn Marino REG_ECOLLATE, /* Unknown collating element. */ 626af9a77bSJohn Marino REG_ECTYPE, /* Unknown character class name. */ 636af9a77bSJohn Marino REG_EESCAPE, /* Trailing backslash. */ 646af9a77bSJohn Marino REG_ESUBREG, /* Invalid back reference. */ 656af9a77bSJohn Marino REG_EBRACK, /* "[]" imbalance */ 666af9a77bSJohn Marino REG_EPAREN, /* "\(\)" or "()" imbalance */ 676af9a77bSJohn Marino REG_EBRACE, /* "\{\}" or "{}" imbalance */ 686af9a77bSJohn Marino REG_BADBR, /* Invalid content of {} */ 696af9a77bSJohn Marino REG_ERANGE, /* Invalid use of range operator */ 706af9a77bSJohn Marino REG_ESPACE, /* Out of memory. */ 716af9a77bSJohn Marino REG_BADRPT, /* Invalid use of repetition operators. */ 726af9a77bSJohn Marino REG_EMPTY, /* rexexp was zero-length string */ 736af9a77bSJohn Marino REG_INVARG, /* invalid argument to regex routine */ 746af9a77bSJohn Marino REG_ILLSEQ /* illegal byte sequence */ 756af9a77bSJohn Marino } reg_errcode_t; 766af9a77bSJohn Marino 776af9a77bSJohn Marino enum { 786af9a77bSJohn Marino TRE_CONFIG_APPROX, 796af9a77bSJohn Marino TRE_CONFIG_WCHAR, 806af9a77bSJohn Marino TRE_CONFIG_MULTIBYTE, 816af9a77bSJohn Marino TRE_CONFIG_SYSTEM_ABI, 826af9a77bSJohn Marino TRE_CONFIG_VERSION 836af9a77bSJohn Marino }; 846af9a77bSJohn Marino 856af9a77bSJohn Marino typedef int regoff_t; 866af9a77bSJohn Marino typedef wchar_t tre_char_t; 876af9a77bSJohn Marino 886af9a77bSJohn Marino typedef struct { 896af9a77bSJohn Marino int re_magic; 906af9a77bSJohn Marino size_t re_nsub; /* Number of parenthesized subexpressions. */ 916af9a77bSJohn Marino const void *re_endp; /* regex string end pointer (REG_PEND) */ 926af9a77bSJohn Marino void *value; /* For internal use only. */ 936af9a77bSJohn Marino } regex_t; 946af9a77bSJohn Marino 956af9a77bSJohn Marino typedef struct { 966af9a77bSJohn Marino regoff_t rm_so; 976af9a77bSJohn Marino regoff_t rm_eo; 986af9a77bSJohn Marino } regmatch_t; 996af9a77bSJohn Marino 1006af9a77bSJohn Marino /* Approximate matching parameter struct. */ 1016af9a77bSJohn Marino typedef struct { 1026af9a77bSJohn Marino int cost_ins; /* Default cost of an inserted character. */ 1036af9a77bSJohn Marino int cost_del; /* Default cost of a deleted character. */ 1046af9a77bSJohn Marino int cost_subst; /* Default cost of a substituted character. */ 1056af9a77bSJohn Marino int max_cost; /* Maximum allowed cost of a match. */ 1066af9a77bSJohn Marino 1076af9a77bSJohn Marino int max_ins; /* Maximum allowed number of inserts. */ 1086af9a77bSJohn Marino int max_del; /* Maximum allowed number of deletes. */ 1096af9a77bSJohn Marino int max_subst; /* Maximum allowed number of substitutes. */ 1106af9a77bSJohn Marino int max_err; /* Maximum allowed number of errors total. */ 1116af9a77bSJohn Marino } regaparams_t; 1126af9a77bSJohn Marino 1136af9a77bSJohn Marino /* Approximate matching result struct. */ 1146af9a77bSJohn Marino typedef struct { 1156af9a77bSJohn Marino size_t nmatch; /* Length of pmatch[] array. */ 1166af9a77bSJohn Marino regmatch_t *pmatch; /* Submatch data. */ 1176af9a77bSJohn Marino int cost; /* Cost of the match. */ 1186af9a77bSJohn Marino int num_ins; /* Number of inserts in the match. */ 1196af9a77bSJohn Marino int num_del; /* Number of deletes in the match. */ 1206af9a77bSJohn Marino int num_subst; /* Number of substitutes in the match. */ 1216af9a77bSJohn Marino } regamatch_t; 1226af9a77bSJohn Marino 1236af9a77bSJohn Marino typedef struct { 1246af9a77bSJohn Marino int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); 1256af9a77bSJohn Marino void (*rewind)(size_t pos, void *context); 1266af9a77bSJohn Marino int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); 1276af9a77bSJohn Marino void *context; 1286af9a77bSJohn Marino } tre_str_source; 1296af9a77bSJohn Marino 1306af9a77bSJohn Marino /* POSIX tre_regcomp() flags. */ 1316af9a77bSJohn Marino #define REG_EXTENDED 1 1326af9a77bSJohn Marino #define REG_ICASE (REG_EXTENDED << 1) 1336af9a77bSJohn Marino #define REG_NEWLINE (REG_ICASE << 1) 1346af9a77bSJohn Marino #define REG_NOSUB (REG_NEWLINE << 1) 1356af9a77bSJohn Marino 1366af9a77bSJohn Marino /* Extra tre_regcomp() flags. */ 1376af9a77bSJohn Marino #define REG_BASIC 0 1386af9a77bSJohn Marino #define REG_LITERAL (REG_NOSUB << 1) 1396af9a77bSJohn Marino #define REG_RIGHT_ASSOC (REG_LITERAL << 1) 1406af9a77bSJohn Marino #define REG_UNGREEDY (REG_RIGHT_ASSOC << 1) 1416af9a77bSJohn Marino #define REG_PEND (REG_UNGREEDY << 1) 1426af9a77bSJohn Marino #define REG_ENHANCED (REG_PEND << 1) 1436af9a77bSJohn Marino 1446af9a77bSJohn Marino /* alias regcomp flags. */ 1456af9a77bSJohn Marino #define REG_NOSPEC REG_LITERAL 1466af9a77bSJohn Marino #define REG_MINIMAL REG_UNGREEDY 1476af9a77bSJohn Marino 1486af9a77bSJohn Marino /* POSIX tre_regexec() flags. */ 1496af9a77bSJohn Marino #define REG_NOTBOL 1 1506af9a77bSJohn Marino #define REG_NOTEOL (REG_NOTBOL << 1) 1516af9a77bSJohn Marino #define REG_STARTEND (REG_NOTEOL << 1) 1526af9a77bSJohn Marino #define REG_BACKR (REG_STARTEND << 1) 1536af9a77bSJohn Marino 1546af9a77bSJohn Marino /* Extra tre_regexec() flags. */ 1556af9a77bSJohn Marino #define REG_APPROX_MATCHER (REG_NOTEOL << 1) 1566af9a77bSJohn Marino #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) 1576af9a77bSJohn Marino 1586af9a77bSJohn Marino /* The maximum number of iterations in a bound expression. */ 1596af9a77bSJohn Marino #define RE_DUP_MAX 255 1606af9a77bSJohn Marino 1616af9a77bSJohn Marino #define _REG_nexec 1 1626af9a77bSJohn Marino 1636af9a77bSJohn Marino __BEGIN_DECLS 1646af9a77bSJohn Marino 1656af9a77bSJohn Marino /* The POSIX.2 regexp functions */ 1666af9a77bSJohn Marino int 167d33005aaSSascha Wildner tre_regcomp(regex_t * __restrict preg, const char * __restrict regex, 168d33005aaSSascha Wildner int cflags); 1696af9a77bSJohn Marino 1706af9a77bSJohn Marino int 171d33005aaSSascha Wildner tre_regexec(const regex_t * __restrict preg, const char * __restrict string, 172d33005aaSSascha Wildner size_t nmatch, regmatch_t pmatch[__restrict_arr], int eflags); 1736af9a77bSJohn Marino 1746af9a77bSJohn Marino size_t 175d33005aaSSascha Wildner tre_regerror(int errcode, const regex_t * __restrict preg, 176d33005aaSSascha Wildner char * __restrict errbuf, size_t errbuf_size); 1776af9a77bSJohn Marino 1786af9a77bSJohn Marino void 1796af9a77bSJohn Marino tre_regfree(regex_t *preg); 1806af9a77bSJohn Marino 1816af9a77bSJohn Marino /* Wide character versions (not in POSIX.2). */ 1826af9a77bSJohn Marino int 1836af9a77bSJohn Marino tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags); 1846af9a77bSJohn Marino 1856af9a77bSJohn Marino int 1866af9a77bSJohn Marino tre_regwexec(const regex_t *preg, const wchar_t *string, 1876af9a77bSJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags); 1886af9a77bSJohn Marino 1896af9a77bSJohn Marino /* Versions with a maximum length argument and therefore the capability to 1906af9a77bSJohn Marino handle null characters in the middle of the strings (not in POSIX.2). */ 1916af9a77bSJohn Marino int 1926af9a77bSJohn Marino tre_regncomp(regex_t *preg, const char *regex, size_t len, int cflags); 1936af9a77bSJohn Marino 1946af9a77bSJohn Marino int 1956af9a77bSJohn Marino tre_regnexec(const regex_t *preg, const char *string, size_t len, 1966af9a77bSJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags); 1976af9a77bSJohn Marino 1986af9a77bSJohn Marino int 1996af9a77bSJohn Marino tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t len, int cflags); 2006af9a77bSJohn Marino 2016af9a77bSJohn Marino int 2026af9a77bSJohn Marino tre_regwnexec(const regex_t *preg, const wchar_t *string, size_t len, 2036af9a77bSJohn Marino size_t nmatch, regmatch_t pmatch[], int eflags); 2046af9a77bSJohn Marino 2056af9a77bSJohn Marino /* Returns the version string. The returned string is static. */ 2066af9a77bSJohn Marino char * 2076af9a77bSJohn Marino tre_version(void); 2086af9a77bSJohn Marino 2096af9a77bSJohn Marino /* Returns the value for a config parameter. The type to which `result' 2106af9a77bSJohn Marino must point to depends of the value of `query', see documentation for 2116af9a77bSJohn Marino more details. */ 2126af9a77bSJohn Marino int 2136af9a77bSJohn Marino tre_config(int query, void *result); 2146af9a77bSJohn Marino 2156af9a77bSJohn Marino /* Returns 1 if the compiled pattern has back references, 0 if not. */ 2166af9a77bSJohn Marino int 2176af9a77bSJohn Marino tre_have_backrefs(const regex_t *preg); 2186af9a77bSJohn Marino 2196af9a77bSJohn Marino /* Returns 1 if the compiled pattern uses approximate matching features, 2206af9a77bSJohn Marino 0 if not. */ 2216af9a77bSJohn Marino int 2226af9a77bSJohn Marino tre_have_approx(const regex_t *preg); 2236af9a77bSJohn Marino __END_DECLS 2246af9a77bSJohn Marino 2256af9a77bSJohn Marino /* The POSIX.2 regexp functions, locale version */ 2266af9a77bSJohn Marino int 2276af9a77bSJohn Marino tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t locale); 2286af9a77bSJohn Marino 2296af9a77bSJohn Marino int 2306af9a77bSJohn Marino tre_regncomp_l(regex_t *preg, const char *regex, size_t len, int cflags, 2316af9a77bSJohn Marino locale_t locale); 2326af9a77bSJohn Marino 2336af9a77bSJohn Marino int 2346af9a77bSJohn Marino tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, 2356af9a77bSJohn Marino locale_t locale); 2366af9a77bSJohn Marino 2376af9a77bSJohn Marino int 2386af9a77bSJohn Marino tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t len, int cflags, 2396af9a77bSJohn Marino locale_t locale); 2406af9a77bSJohn Marino 2416af9a77bSJohn Marino #endif /* !_REGEX_H_ */ 242