xref: /onnv-gate/usr/src/lib/libast/common/include/regex.h (revision 12068:08a39a083754)
14887Schin /***********************************************************************
24887Schin *                                                                      *
34887Schin *               This software is part of the ast package               *
4*12068SRoger.Faulkner@Oracle.COM *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
54887Schin *                      and is licensed under the                       *
64887Schin *                  Common Public License, Version 1.0                  *
78462SApril.Chin@Sun.COM *                    by AT&T Intellectual Property                     *
84887Schin *                                                                      *
94887Schin *                A copy of the License is available at                 *
104887Schin *            http://www.opensource.org/licenses/cpl1.0.txt             *
114887Schin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
124887Schin *                                                                      *
134887Schin *              Information and Software Systems Research               *
144887Schin *                            AT&T Research                             *
154887Schin *                           Florham Park NJ                            *
164887Schin *                                                                      *
174887Schin *                 Glenn Fowler <gsf@research.att.com>                  *
184887Schin *                  David Korn <dgk@research.att.com>                   *
194887Schin *                   Phong Vo <kpv@research.att.com>                    *
204887Schin *                                                                      *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin 
244887Schin /*
254887Schin  * regex library interface
264887Schin  */
274887Schin 
284887Schin #ifdef	_AST_STD_I
294887Schin #define _REGEX_H	-1
304887Schin #define regex_t		int
314887Schin #define regmatch_t	int
324887Schin #endif
334887Schin #ifndef _REGEX_H
344887Schin #define _REGEX_H	1
354887Schin #undef	regex_t
364887Schin #undef	regmatch_t
374887Schin 
384887Schin #include <ast_common.h>
394887Schin 
404887Schin #define REG_VERSION	20030916L
414887Schin 
424887Schin /* regcomp flags */
434887Schin 
444887Schin #define REG_AUGMENTED	0x00000001	/* enable ! & < >		*/
454887Schin #define REG_EXTENDED	0x00000002	/* enable ( | )			*/
464887Schin #define REG_ICASE	0x00000004	/* ignore case in match		*/
474887Schin #define REG_NEWLINE	0x00000008	/* ^/$ match embedded \n	*/
484887Schin #define REG_NOSUB	0x00000010	/* don't report subexp matches	*/
494887Schin #define REG_SHELL	0x00000020	/* shell pattern syntax		*/
504887Schin 
514887Schin /* nonstandard regcomp flags */
524887Schin 
534887Schin #define REG_LEFT	0x00000100	/* implicit ^...		*/
544887Schin #define REG_LITERAL	0x00000200	/* no operators			*/
554887Schin #define REG_MINIMAL	0x00000400	/* minimal match		*/
564887Schin #define REG_NULL	0x00000800	/* allow null patterns		*/
574887Schin #define REG_RIGHT	0x00001000	/* implicit ...$		*/
584887Schin #define REG_LENIENT	0x00002000	/* look the other way		*/
594887Schin #define REG_ESCAPE	0x00004000	/* \ escapes delimiter in [...]	*/
604887Schin #define REG_FIRST	0x00008000	/* first match found will do	*/
614887Schin #define REG_MULTIPLE	0x00010000	/* multiple \n sep patterns	*/
624887Schin #define REG_DISCIPLINE	0x00020000	/* regex_t.re_disc is valid	*/
634887Schin #define REG_SPAN	0x00040000	/* . matches \n			*/
644887Schin #define REG_COMMENT	0x00080000	/* ignore pattern space & #...\n*/
654887Schin #define REG_MULTIREF	0x00100000	/* multiple digit backrefs	*/
664887Schin #define REG_MUSTDELIM	0x08000000	/* all delimiters required	*/
674887Schin #define REG_DELIMITED	0x10000000	/* pattern[0] is delimiter	*/
68*12068SRoger.Faulkner@Oracle.COM #define REG_CLASS_ESCAPE 0x80000000	/* \ escapes in [...]		*/
694887Schin 
704887Schin #define REG_SHELL_DOT	0x00200000	/* explicit leading . match	*/
714887Schin #define REG_SHELL_ESCAPED 0x00400000	/* \ not special		*/
72*12068SRoger.Faulkner@Oracle.COM #define REG_SHELL_GROUP	0x20000000	/* (|&) inside [@|&](...) only	*/
734887Schin #define REG_SHELL_PATH	0x00800000	/* explicit / match		*/
744887Schin 
7510898Sroland.mainz@nrubsig.org #define REG_REGEXP	0x40000000	/* <regexp.h> compatibility	*/
7610898Sroland.mainz@nrubsig.org 
774887Schin /* regexec flags */
784887Schin 
794887Schin #define REG_NOTBOL	0x00000040	/* ^ is not a special char	*/
804887Schin #define REG_NOTEOL	0x00000080	/* $ is not a special char	*/
814887Schin 
824887Schin /* nonstandard regexec flags */
834887Schin 
844887Schin #define REG_INVERT	0x01000000	/* invert regrexec match sense	*/
854887Schin #define REG_STARTEND	0x02000000	/* subject==match[0].rm_{so,eo} */
864887Schin #define REG_ADVANCE	0x04000000	/* advance match[0].rm_{so,eo}	*/
874887Schin 
884887Schin /* regalloc flags */
894887Schin 
904887Schin #define REG_NOFREE	0x00000001	/* don't free			*/
914887Schin 
924887Schin /* regsub flags */
934887Schin 
944887Schin #define REG_SUB_ALL	0x00000001	/* substitute all occurrences	*/
954887Schin #define REG_SUB_LOWER	0x00000002	/* substitute to lower case	*/
964887Schin #define REG_SUB_UPPER	0x00000004	/* substitute to upper case	*/
974887Schin #define REG_SUB_PRINT	0x00000010	/* internal no-op		*/
984887Schin #define REG_SUB_NUMBER	0x00000020	/* internal no-op		*/
994887Schin #define REG_SUB_STOP	0x00000040	/* internal no-op		*/
1004887Schin #define REG_SUB_WRITE	0x00000080	/* internal no-op		*/
1014887Schin #define REG_SUB_LAST	0x00000100	/* last substitution option	*/
1024887Schin #define REG_SUB_FULL	0x00000200	/* fully delimited		*/
1034887Schin #define REG_SUB_USER	0x00001000	/* first user flag bit		*/
1044887Schin 
1054887Schin /* regex error codes */
1064887Schin 
1074887Schin #define REG_ENOSYS	(-1)		/* not supported		*/
1084887Schin #define REG_NOMATCH	1		/* regexec didn't match		*/
1094887Schin #define REG_BADPAT	2		/* invalid regular expression	*/
1104887Schin #define REG_ECOLLATE	3		/* invalid collation element	*/
1114887Schin #define REG_ECTYPE	4		/* invalid character class	*/
1124887Schin #define REG_EESCAPE	5		/* trailing \ in pattern	*/
1134887Schin #define REG_ESUBREG	6		/* invalid \digit backreference	*/
1144887Schin #define REG_EBRACK	7		/* [...] imbalance		*/
1154887Schin #define REG_EPAREN	8		/* \(...\) or (...) imbalance	*/
1164887Schin #define REG_EBRACE	9		/* \{...\} or {...} imbalance	*/
1174887Schin #define REG_BADBR	10		/* invalid {...} digits		*/
1184887Schin #define REG_ERANGE	11		/* invalid [...] range endpoint	*/
1194887Schin #define REG_ESPACE	12		/* out of space			*/
120*12068SRoger.Faulkner@Oracle.COM #define REG_BADRPT	13		/* unary op not preceded by re	*/
1214887Schin #define REG_ENULL	14		/* empty subexpr in pattern	*/
1224887Schin #define REG_ECOUNT	15		/* re component count overflow	*/
1234887Schin #define REG_BADESC	16		/* invalid \char escape		*/
1244887Schin #define REG_VERSIONID	17		/* version id (pseudo error)	*/
1254887Schin #define REG_EFLAGS	18		/* flags conflict		*/
1264887Schin #define REG_EDELIM	19		/* invalid or omitted delimiter	*/
1274887Schin #define REG_PANIC	20		/* unrecoverable internal error	*/
1284887Schin 
1294887Schin struct regex_s; typedef struct regex_s regex_t;
1304887Schin struct regdisc_s; typedef struct regdisc_s regdisc_t;
1314887Schin 
1324887Schin typedef int (*regclass_t)(int);
133*12068SRoger.Faulkner@Oracle.COM typedef uint32_t regflags_t;
1344887Schin typedef int regoff_t;
1354887Schin typedef int (*regerror_t)(const regex_t*, regdisc_t*, int, ...);
1364887Schin typedef void* (*regcomp_t)(const regex_t*, const char*, size_t, regdisc_t*);
1374887Schin typedef int (*regexec_t)(const regex_t*, void*, const char*, size_t, const char*, size_t, char**, regdisc_t*);
1384887Schin typedef void* (*regresize_t)(void*, void*, size_t);
1394887Schin typedef int (*regrecord_t)(void*, const char*, size_t);
1404887Schin 
1414887Schin typedef struct regmatch_s
1424887Schin {
1434887Schin 	regoff_t	rm_so;		/* offset of start		*/
1444887Schin 	regoff_t	rm_eo;		/* offset of end		*/
1454887Schin } regmatch_t;
1464887Schin 
1474887Schin typedef struct regsub_s
1484887Schin {
1494887Schin 	regflags_t	re_flags;	/* regsubcomp() flags		*/
1504887Schin 	char*		re_buf;		/* regsubexec() output buffer	*/
1514887Schin 	size_t		re_len;		/* re_buf length		*/
1524887Schin 	int		re_min;		/* regsubcomp() min matches	*/
1534887Schin #ifdef _REG_SUB_PRIVATE_
1544887Schin 	_REG_SUB_PRIVATE_
1554887Schin #endif
1564887Schin } regsub_t;
1574887Schin 
1584887Schin struct regdisc_s
1594887Schin {
1604887Schin 	unsigned long	re_version;	/* discipline version		*/
1614887Schin 	regflags_t	re_flags;	/* discipline flags		*/
1624887Schin 	regerror_t	re_errorf;	/* error function		*/
1634887Schin 	int		re_errorlevel;	/* errorf level			*/
1644887Schin 	regresize_t	re_resizef;	/* alloc/free function		*/
1654887Schin 	void*		re_resizehandle;/* resizef handle		*/
1664887Schin 	regcomp_t	re_compf;	/* (?{...}) compile function	*/
1674887Schin 	regexec_t	re_execf;	/* (?{...}) execute function	*/
1684887Schin 	unsigned char*	re_map;		/* external to native ccode map	*/
1694887Schin };
1704887Schin 
1714887Schin typedef struct regstat_s
1724887Schin {
1734887Schin 	regflags_t	re_flags;	/* REG_LEFT|REG_RIGHT		*/
1744887Schin 	ssize_t		re_min;		/* min anchored match length	*/
1754887Schin 	ssize_t		re_max;		/* max anchored match length	*/
1764887Schin 	ssize_t		re_record;	/* regrexec() match length	*/
1774887Schin } regstat_t;
1784887Schin 
1794887Schin struct regex_s
1804887Schin {
1814887Schin 	size_t		re_nsub;	/* number of subexpressions	*/
1824887Schin 	struct reglib_s*re_info;	/* library private info		*/
1834887Schin 	size_t		re_npat;	/* number of pattern chars used	*/
1844887Schin 	regdisc_t*	re_disc;	/* REG_DISCIPLINE discipline	*/
1854887Schin 	regsub_t*	re_sub;		/* regsubcomp() data		*/
1864887Schin };
1874887Schin 
1884887Schin #define reginit(disc)	(memset(disc,0,sizeof(*(disc))),(disc)->re_version=REG_VERSION)
1894887Schin 
1904887Schin #if _BLD_ast && defined(__EXPORT__)
1914887Schin #define extern		__EXPORT__
1924887Schin #endif
1934887Schin 
1944887Schin extern int	regcomp(regex_t*, const char*, regflags_t);
1954887Schin extern size_t	regerror(int, const regex_t*, char*, size_t);
1964887Schin extern int	regexec(const regex_t*, const char*, size_t, regmatch_t*, regflags_t);
1974887Schin extern void	regfree(regex_t*);
1984887Schin 
1994887Schin /* nonstandard hooks */
2004887Schin 
2014887Schin #define _REG_cache	1	/* have regcache()			*/
2024887Schin #define _REG_class	1	/* have regclass()			*/
2034887Schin #define _REG_collate	1	/* have regcollate(), regclass()	*/
2044887Schin #define _REG_comb	1	/* have regcomb()			*/
2054887Schin #define _REG_decomp	1	/* have regdecomp()			*/
2064887Schin #define _REG_dup	1	/* have regdup()			*/
2074887Schin #define _REG_fatal	1	/* have regfatal(), regfatalpat()	*/
2084887Schin #define _REG_ncomp	1	/* have regncomp()			*/
2094887Schin #define _REG_nexec	1	/* have regnexec()			*/
2104887Schin #define _REG_rexec	1	/* have regrexec(), regrecord()		*/
2114887Schin #define _REG_stat	1	/* have regstat()			*/
2124887Schin #define _REG_subcomp	1	/* have regsubcomp(), regsubexec()	*/
2134887Schin 
2144887Schin extern regclass_t regclass(const char*, char**);
2154887Schin extern int	regaddclass(const char*, regclass_t);
2164887Schin extern int	regcollate(const char*, char**, char*, int);
2174887Schin extern int	regcomb(regex_t*, regex_t*);
2184887Schin extern size_t	regdecomp(regex_t*, regflags_t, char*, size_t);
2194887Schin extern int	regdup(regex_t*, regex_t*);
2204887Schin extern int	regncomp(regex_t*, const char*, size_t, regflags_t);
2214887Schin extern int	regnexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t);
2224887Schin extern void	regfatal(regex_t*, int, int);
2234887Schin extern void	regfatalpat(regex_t*, int, int, const char*);
2244887Schin extern int	regrecord(const regex_t*);
2254887Schin extern int	regrexec(const regex_t*, const char*, size_t, size_t, regmatch_t*, regflags_t, int, void*, regrecord_t);
2264887Schin extern regstat_t* regstat(const regex_t*);
2274887Schin 
2284887Schin extern regex_t*	regcache(const char*, regflags_t, int*);
2294887Schin 
2304887Schin extern int	regsubcomp(regex_t*, const char*, const regflags_t*, int, regflags_t);
2314887Schin extern int	regsubexec(const regex_t*, const char*, size_t, regmatch_t*);
2324887Schin extern int	regsubflags(regex_t*, const char*, char**, int, const regflags_t*, int*, regflags_t*);
2334887Schin extern void	regsubfree(regex_t*);
2344887Schin 
2354887Schin /* obsolete hooks */
2364887Schin 
2374887Schin #ifndef _SFIO_H
2384887Schin struct _sfio_s;
2394887Schin #endif
2404887Schin 
2414887Schin extern void	regalloc(void*, regresize_t, regflags_t);
2424887Schin extern int	regsub(const regex_t*, struct _sfio_s*, const char*, const char*, size_t, regmatch_t*, regflags_t);
2434887Schin 
2444887Schin #undef	extern
2454887Schin 
2464887Schin #endif
247