14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin
244887Schin /*
254887Schin * D. G. Korn
264887Schin * G. S. Fowler
274887Schin * AT&T Research
284887Schin *
294887Schin * match shell file patterns
304887Schin * this interface is a wrapper on regex
314887Schin *
324887Schin * sh pattern egrep RE description
334887Schin * ---------- -------- -----------
344887Schin * * .* 0 or more chars
354887Schin * ? . any single char
364887Schin * [.] [.] char class
374887Schin * [!.] [^.] negated char class
384887Schin * [[:.:]] [[:.:]] ctype class
394887Schin * [[=.=]] [[=.=]] equivalence class
404887Schin * [[...]] [[...]] collation element
414887Schin * *(.) (.)* 0 or more of
424887Schin * +(.) (.)+ 1 or more of
434887Schin * ?(.) (.)? 0 or 1 of
444887Schin * (.) (.) 1 of
454887Schin * @(.) (.) 1 of
464887Schin * a|b a|b a or b
474887Schin * \# () subgroup back reference [1-9]
484887Schin * a&b a and b
494887Schin * !(.) none of
504887Schin *
514887Schin * \ used to escape metacharacters
524887Schin *
534887Schin * *, ?, (, |, &, ), [, \ must be \'d outside of [...]
544887Schin * only ] must be \'d inside [...]
554887Schin *
564887Schin */
574887Schin
584887Schin #include <ast.h>
594887Schin #include <regex.h>
604887Schin
614887Schin static struct State_s
624887Schin {
634887Schin regmatch_t* match;
644887Schin int nmatch;
654887Schin } matchstate;
664887Schin
674887Schin /*
684887Schin * subgroup match
694887Schin * 0 returned if no match
704887Schin * otherwise number of subgroups matched returned
714887Schin * match group begin offsets are even elements of sub
724887Schin * match group end offsets are odd elements of sub
734887Schin * the matched string is from s+sub[0] up to but not
744887Schin * including s+sub[1]
754887Schin */
764887Schin
774887Schin int
strgrpmatch(const char * b,const char * p,int * sub,int n,register int flags)784887Schin strgrpmatch(const char* b, const char* p, int* sub, int n, register int flags)
794887Schin {
804887Schin register regex_t* re;
814887Schin register int* end;
824887Schin register int i;
834887Schin register regflags_t reflags;
844887Schin
854887Schin /*
864887Schin * 0 and empty patterns are special
874887Schin */
884887Schin
894887Schin if (!p || !b)
904887Schin {
914887Schin if (!p && !b)
924887Schin regcache(NiL, 0, NiL);
934887Schin return 0;
944887Schin }
954887Schin if (!*p)
968462SApril.Chin@Sun.COM {
978462SApril.Chin@Sun.COM if (sub && n > 0)
988462SApril.Chin@Sun.COM sub[0] = sub[1] = 0;
994887Schin return *b == 0;
1008462SApril.Chin@Sun.COM }
1014887Schin
1024887Schin /*
1034887Schin * convert flags
1044887Schin */
1054887Schin
1064887Schin if (flags & REG_ADVANCE)
1074887Schin reflags = flags & ~REG_ADVANCE;
1084887Schin else
1094887Schin {
1104887Schin reflags = REG_SHELL|REG_AUGMENTED;
1114887Schin if (!(flags & STR_MAXIMAL))
1124887Schin reflags |= REG_MINIMAL;
1134887Schin if (flags & STR_GROUP)
1144887Schin reflags |= REG_SHELL_GROUP;
1154887Schin if (flags & STR_LEFT)
1164887Schin reflags |= REG_LEFT;
1174887Schin if (flags & STR_RIGHT)
1184887Schin reflags |= REG_RIGHT;
1194887Schin if (flags & STR_ICASE)
1204887Schin reflags |= REG_ICASE;
1214887Schin }
1224887Schin if (!sub || n <= 0)
1234887Schin reflags |= REG_NOSUB;
1244887Schin if (!(re = regcache(p, reflags, NiL)))
1254887Schin return 0;
1264887Schin if (n > matchstate.nmatch)
1274887Schin {
1284887Schin if (!(matchstate.match = newof(matchstate.match, regmatch_t, n, 0)))
1294887Schin return 0;
1304887Schin matchstate.nmatch = n;
1314887Schin }
1324887Schin if (regexec(re, b, n, matchstate.match, reflags & ~(REG_MINIMAL|REG_SHELL_GROUP|REG_LEFT|REG_RIGHT|REG_ICASE)))
1334887Schin return 0;
1344887Schin if (!sub || n <= 0)
1354887Schin return 1;
1364887Schin i = re->re_nsub;
1374887Schin end = sub + n * 2;
1384887Schin for (n = 0; sub < end && n <= i; n++)
1394887Schin {
1404887Schin *sub++ = matchstate.match[n].rm_so;
1414887Schin *sub++ = matchstate.match[n].rm_eo;
1424887Schin }
1434887Schin return i + 1;
1444887Schin }
1454887Schin
1464887Schin /*
1474887Schin * compare the string s with the shell pattern p
1484887Schin * returns 1 for match 0 otherwise
1494887Schin */
1504887Schin
1514887Schin int
strmatch(const char * s,const char * p)1524887Schin strmatch(const char* s, const char* p)
1534887Schin {
1544887Schin return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT);
1554887Schin }
1564887Schin
1574887Schin /*
1584887Schin * leading substring match
1594887Schin * first char after end of substring returned
1604887Schin * 0 returned if no match
1614887Schin *
1624887Schin * OBSOLETE: use strgrpmatch()
1634887Schin */
1644887Schin
1654887Schin char*
strsubmatch(const char * s,const char * p,int flags)1664887Schin strsubmatch(const char* s, const char* p, int flags)
1674887Schin {
1684887Schin int match[2];
1694887Schin
1704887Schin return strgrpmatch(s, p, match, 1, (flags ? STR_MAXIMAL : 0)|STR_LEFT) ? (char*)s + match[1] : (char*)0;
1714887Schin }
172