14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 78462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * Phong Vo <kpv@research.att.com> * 204887Schin * * 214887Schin ***********************************************************************/ 224887Schin #pragma prototyped 234887Schin 244887Schin /* 254887Schin * regex library interface 264887Schin */ 274887Schin 284887Schin #ifdef _AST_STD_I 294887Schin #define _REGEX_H -1 304887Schin #define regex_t int 314887Schin #define regmatch_t int 324887Schin #endif 334887Schin #ifndef _REGEX_H 344887Schin #define _REGEX_H 1 354887Schin #undef regex_t 364887Schin #undef regmatch_t 374887Schin 384887Schin #include <ast_common.h> 394887Schin 404887Schin #define REG_VERSION 20030916L 414887Schin 424887Schin /* regcomp flags */ 434887Schin 444887Schin #define REG_AUGMENTED 0x00000001 /* enable ! & < > */ 454887Schin #define REG_EXTENDED 0x00000002 /* enable ( | ) */ 464887Schin #define REG_ICASE 0x00000004 /* ignore case in match */ 474887Schin #define REG_NEWLINE 0x00000008 /* ^/$ match embedded \n */ 484887Schin #define REG_NOSUB 0x00000010 /* don't report subexp matches */ 494887Schin #define REG_SHELL 0x00000020 /* shell pattern syntax */ 504887Schin 514887Schin /* nonstandard regcomp flags */ 524887Schin 534887Schin #define REG_LEFT 0x00000100 /* implicit ^... */ 544887Schin #define REG_LITERAL 0x00000200 /* no operators */ 554887Schin #define REG_MINIMAL 0x00000400 /* minimal match */ 564887Schin #define REG_NULL 0x00000800 /* allow null patterns */ 574887Schin #define REG_RIGHT 0x00001000 /* implicit ...$ */ 584887Schin #define REG_LENIENT 0x00002000 /* look the other way */ 594887Schin #define REG_ESCAPE 0x00004000 /* \ escapes delimiter in [...] */ 604887Schin #define REG_FIRST 0x00008000 /* first match found will do */ 614887Schin #define REG_MULTIPLE 0x00010000 /* multiple \n sep patterns */ 624887Schin #define REG_DISCIPLINE 0x00020000 /* regex_t.re_disc is valid */ 634887Schin #define REG_SPAN 0x00040000 /* . matches \n */ 644887Schin #define REG_COMMENT 0x00080000 /* ignore pattern space & #...\n*/ 654887Schin #define REG_MULTIREF 0x00100000 /* multiple digit backrefs */ 664887Schin #define REG_MUSTDELIM 0x08000000 /* all delimiters required */ 674887Schin #define REG_DELIMITED 0x10000000 /* pattern[0] is delimiter */ 68*12068SRoger.Faulkner@Oracle.COM #define REG_CLASS_ESCAPE 0x80000000 /* \ escapes in [...] */ 694887Schin 704887Schin #define REG_SHELL_DOT 0x00200000 /* explicit leading . match */ 714887Schin #define REG_SHELL_ESCAPED 0x00400000 /* \ not special */ 72*12068SRoger.Faulkner@Oracle.COM #define REG_SHELL_GROUP 0x20000000 /* (|&) inside [@|&](...) only */ 734887Schin #define REG_SHELL_PATH 0x00800000 /* explicit / match */ 744887Schin 7510898Sroland.mainz@nrubsig.org #define REG_REGEXP 0x40000000 /* <regexp.h> compatibility */ 7610898Sroland.mainz@nrubsig.org 774887Schin /* regexec flags */ 784887Schin 794887Schin #define REG_NOTBOL 0x00000040 /* ^ is not a special char */ 804887Schin #define REG_NOTEOL 0x00000080 /* $ is not a special char */ 814887Schin 824887Schin /* nonstandard regexec flags */ 834887Schin 844887Schin #define REG_INVERT 0x01000000 /* invert regrexec match sense */ 854887Schin #define REG_STARTEND 0x02000000 /* subject==match[0].rm_{so,eo} */ 864887Schin #define REG_ADVANCE 0x04000000 /* advance match[0].rm_{so,eo} */ 874887Schin 884887Schin /* regalloc flags */ 894887Schin 904887Schin #define REG_NOFREE 0x00000001 /* don't free */ 914887Schin 924887Schin /* regsub flags */ 934887Schin 944887Schin #define REG_SUB_ALL 0x00000001 /* substitute all occurrences */ 954887Schin #define REG_SUB_LOWER 0x00000002 /* substitute to lower case */ 964887Schin #define REG_SUB_UPPER 0x00000004 /* substitute to upper case */ 974887Schin #define REG_SUB_PRINT 0x00000010 /* internal no-op */ 984887Schin #define REG_SUB_NUMBER 0x00000020 /* internal no-op */ 994887Schin #define REG_SUB_STOP 0x00000040 /* internal no-op */ 1004887Schin #define REG_SUB_WRITE 0x00000080 /* internal no-op */ 1014887Schin #define REG_SUB_LAST 0x00000100 /* last substitution option */ 1024887Schin #define REG_SUB_FULL 0x00000200 /* fully delimited */ 1034887Schin #define REG_SUB_USER 0x00001000 /* first user flag bit */ 1044887Schin 1054887Schin /* regex error codes */ 1064887Schin 1074887Schin #define REG_ENOSYS (-1) /* not supported */ 1084887Schin #define REG_NOMATCH 1 /* regexec didn't match */ 1094887Schin #define REG_BADPAT 2 /* invalid regular expression */ 1104887Schin #define REG_ECOLLATE 3 /* invalid collation element */ 1114887Schin #define REG_ECTYPE 4 /* invalid character class */ 1124887Schin #define REG_EESCAPE 5 /* trailing \ in pattern */ 1134887Schin #define REG_ESUBREG 6 /* invalid \digit backreference */ 1144887Schin #define REG_EBRACK 7 /* [...] imbalance */ 1154887Schin #define REG_EPAREN 8 /* \(...\) or (...) imbalance */ 1164887Schin #define REG_EBRACE 9 /* \{...\} or {...} imbalance */ 1174887Schin #define REG_BADBR 10 /* invalid {...} digits */ 1184887Schin #define REG_ERANGE 11 /* invalid [...] range endpoint */ 1194887Schin #define REG_ESPACE 12 /* out of space */ 120*12068SRoger.Faulkner@Oracle.COM #define REG_BADRPT 13 /* unary op not preceded by re */ 1214887Schin #define REG_ENULL 14 /* empty subexpr in pattern */ 1224887Schin #define REG_ECOUNT 15 /* re component count overflow */ 1234887Schin #define REG_BADESC 16 /* invalid \char escape */ 1244887Schin #define REG_VERSIONID 17 /* version id (pseudo error) */ 1254887Schin #define REG_EFLAGS 18 /* flags conflict */ 1264887Schin #define REG_EDELIM 19 /* invalid or omitted delimiter */ 1274887Schin #define REG_PANIC 20 /* unrecoverable internal error */ 1284887Schin 1294887Schin struct regex_s; typedef struct regex_s regex_t; 1304887Schin struct regdisc_s; typedef struct regdisc_s regdisc_t; 1314887Schin 1324887Schin typedef int (*regclass_t)(int); 133*12068SRoger.Faulkner@Oracle.COM typedef uint32_t regflags_t; 1344887Schin typedef int regoff_t; 1354887Schin typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...); 1364887Schin typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*); 1374887Schin typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*); 1384887Schin typedef void* (*regresize_t)(void*, void*, size_t); 1394887Schin typedef int (*regrecord_t)(void*, const char*, size_t); 1404887Schin 1414887Schin typedef struct regmatch_s 1424887Schin { 1434887Schin regoff_t rm_so; /* offset of start */ 1444887Schin regoff_t rm_eo; /* offset of end */ 1454887Schin } regmatch_t; 1464887Schin 1474887Schin typedef struct regsub_s 1484887Schin { 1494887Schin regflags_t re_flags; /* regsubcomp() flags */ 1504887Schin char* re_buf; /* regsubexec() output buffer */ 1514887Schin size_t re_len; /* re_buf length */ 1524887Schin int re_min; /* regsubcomp() min matches */ 1534887Schin #ifdef _REG_SUB_PRIVATE_ 1544887Schin _REG_SUB_PRIVATE_ 1554887Schin #endif 1564887Schin } regsub_t; 1574887Schin 1584887Schin struct regdisc_s 1594887Schin { 1604887Schin unsigned long re_version; /* discipline version */ 1614887Schin regflags_t re_flags; /* discipline flags */ 1624887Schin regerror_t re_errorf; /* error function */ 1634887Schin int re_errorlevel; /* errorf level */ 1644887Schin regresize_t re_resizef; /* alloc/free function */ 1654887Schin void* re_resizehandle;/* resizef handle */ 1664887Schin regcomp_t re_compf; /* (?{...}) compile function */ 1674887Schin regexec_t re_execf; /* (?{...}) execute function */ 1684887Schin unsigned char* re_map; /* external to native ccode map */ 1694887Schin }; 1704887Schin 1714887Schin typedef struct regstat_s 1724887Schin { 1734887Schin regflags_t re_flags; /* REG_LEFT|REG_RIGHT */ 1744887Schin ssize_t re_min; /* min anchored match length */ 1754887Schin ssize_t re_max; /* max anchored match length */ 1764887Schin ssize_t re_record; /* regrexec() match length */ 1774887Schin } regstat_t; 1784887Schin 1794887Schin struct regex_s 1804887Schin { 1814887Schin size_t re_nsub; /* number of subexpressions */ 1824887Schin struct reglib_s*re_info; /* library private info */ 1834887Schin size_t re_npat; /* number of pattern chars used */ 1844887Schin regdisc_t* re_disc; /* REG_DISCIPLINE discipline */ 1854887Schin regsub_t* re_sub; /* regsubcomp() data */ 1864887Schin }; 1874887Schin 1884887Schin #define reginit(disc) (memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION) 1894887Schin 1904887Schin #if _BLD_ast && defined(__EXPORT__) 1914887Schin #define extern __EXPORT__ 1924887Schin #endif 1934887Schin 1944887Schin extern int regcomp(regex_t*, const char*, regflags_t); 1954887Schin extern size_t regerror(int, const regex_t*, char*, size_t); 1964887Schin extern int regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t); 1974887Schin extern void regfree(regex_t*); 1984887Schin 1994887Schin /* nonstandard hooks */ 2004887Schin 2014887Schin #define _REG_cache 1 /* have regcache() */ 2024887Schin #define _REG_class 1 /* have regclass() */ 2034887Schin #define _REG_collate 1 /* have regcollate(), regclass() */ 2044887Schin #define _REG_comb 1 /* have regcomb() */ 2054887Schin #define _REG_decomp 1 /* have regdecomp() */ 2064887Schin #define _REG_dup 1 /* have regdup() */ 2074887Schin #define _REG_fatal 1 /* have regfatal(), regfatalpat() */ 2084887Schin #define _REG_ncomp 1 /* have regncomp() */ 2094887Schin #define _REG_nexec 1 /* have regnexec() */ 2104887Schin #define _REG_rexec 1 /* have regrexec(), regrecord() */ 2114887Schin #define _REG_stat 1 /* have regstat() */ 2124887Schin #define _REG_subcomp 1 /* have regsubcomp(), regsubexec() */ 2134887Schin 2144887Schin extern regclass_t regclass(const char*, char**); 2154887Schin extern int regaddclass(const char*, regclass_t); 2164887Schin extern int regcollate(const char*, char**, char*, int); 2174887Schin extern int regcomb(regex_t*, regex_t*); 2184887Schin extern size_t regdecomp(regex_t*, regflags_t, char*, size_t); 2194887Schin extern int regdup(regex_t*, regex_t*); 2204887Schin extern int regncomp(regex_t*, const char*, size_t, regflags_t); 2214887Schin extern int regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t); 2224887Schin extern void regfatal(regex_t*, int, int); 2234887Schin extern void regfatalpat(regex_t*, int, int, const char*); 2244887Schin extern int regrecord(const regex_t*); 2254887Schin extern int regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t); 2264887Schin extern regstat_t* regstat(const regex_t*); 2274887Schin 2284887Schin extern regex_t* regcache(const char*, regflags_t, int*); 2294887Schin 2304887Schin extern int regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t); 2314887Schin extern int regsubexec(const regex_t*, const char*, size_t, regmatch_t*); 2324887Schin extern int regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*); 2334887Schin extern void regsubfree(regex_t*); 2344887Schin 2354887Schin /* obsolete hooks */ 2364887Schin 2374887Schin #ifndef _SFIO_H 2384887Schin struct _sfio_s; 2394887Schin #endif 2404887Schin 2414887Schin extern void regalloc(void*, regresize_t, regflags_t); 2424887Schin extern int regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t); 2434887Schin 2444887Schin #undef extern 2454887Schin 2464887Schin #endif 247